PyPI - wordlesmith - Versions diffs - 0.1.0__tar.gz - Mend

wordlesmith 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

wordlesmith-0.1.0/.gitignore +42 -0
wordlesmith-0.1.0/CHANGELOG.md +47 -0
wordlesmith-0.1.0/LICENSE.txt +21 -0
wordlesmith-0.1.0/PKG-INFO +300 -0
wordlesmith-0.1.0/README.md +247 -0
wordlesmith-0.1.0/benchmarks/results/official/curated_all.csv +4 -0
wordlesmith-0.1.0/benchmarks/results/official/curated_all.md +5 -0
wordlesmith-0.1.0/benchmarks/results/official/curated_answers.csv +6 -0
wordlesmith-0.1.0/benchmarks/results/official/curated_answers.md +7 -0
wordlesmith-0.1.0/benchmarks/results/official/distribution_curated.png +0 -0
wordlesmith-0.1.0/benchmarks/results/official/distribution_curated_all.png +0 -0
wordlesmith-0.1.0/benchmarks/results/official/distribution_valid.png +0 -0
wordlesmith-0.1.0/benchmarks/results/official/meta.txt +4 -0
wordlesmith-0.1.0/benchmarks/results/official/per_word_curated.csv +2316 -0
wordlesmith-0.1.0/benchmarks/results/official/per_word_valid.csv +14856 -0
wordlesmith-0.1.0/benchmarks/results/official/primary_valid.csv +6 -0
wordlesmith-0.1.0/benchmarks/results/official/primary_valid.md +7 -0
wordlesmith-0.1.0/docs/demo.gif +0 -0
wordlesmith-0.1.0/docs/demo.tape +34 -0
wordlesmith-0.1.0/docs/solve.gif +0 -0
wordlesmith-0.1.0/docs/solve.tape +30 -0
wordlesmith-0.1.0/docs/strategies.md +214 -0
wordlesmith-0.1.0/examples/demo.ipynb +192 -0
wordlesmith-0.1.0/pyproject.toml +102 -0
wordlesmith-0.1.0/scripts/bench_one.py +39 -0
wordlesmith-0.1.0/scripts/gen_openings.py +95 -0
wordlesmith-0.1.0/scripts/run_official_benchmark.py +85 -0
wordlesmith-0.1.0/src/wordlesmith/__init__.py +41 -0
wordlesmith-0.1.0/src/wordlesmith/benchmark.py +243 -0
wordlesmith-0.1.0/src/wordlesmith/cli.py +303 -0
wordlesmith-0.1.0/src/wordlesmith/data/curated_answers.txt +2318 -0
wordlesmith-0.1.0/src/wordlesmith/data/openings.json +10 -0
wordlesmith-0.1.0/src/wordlesmith/data/valid_words.txt +14859 -0
wordlesmith-0.1.0/src/wordlesmith/feedback.py +87 -0
wordlesmith-0.1.0/src/wordlesmith/game.py +92 -0
wordlesmith-0.1.0/src/wordlesmith/py.typed +0 -0
wordlesmith-0.1.0/src/wordlesmith/strategies/__init__.py +49 -0
wordlesmith-0.1.0/src/wordlesmith/strategies/base.py +128 -0
wordlesmith-0.1.0/src/wordlesmith/strategies/entropy.py +24 -0
wordlesmith-0.1.0/src/wordlesmith/strategies/expected_size.py +19 -0
wordlesmith-0.1.0/src/wordlesmith/strategies/frequency.py +42 -0
wordlesmith-0.1.0/src/wordlesmith/strategies/minimax.py +18 -0
wordlesmith-0.1.0/src/wordlesmith/strategies/random_guess.py +20 -0
wordlesmith-0.1.0/src/wordlesmith/words.py +51 -0
wordlesmith-0.1.0/tests/test_benchmark.py +112 -0
wordlesmith-0.1.0/tests/test_cli.py +93 -0
wordlesmith-0.1.0/tests/test_feedback.py +90 -0
wordlesmith-0.1.0/tests/test_game.py +60 -0
wordlesmith-0.1.0/tests/test_strategies.py +111 -0
wordlesmith-0.1.0/tests/test_words.py +45 -0

wordlesmith-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,42 @@
+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+.eggs/
+build/
+dist/
+.venv/
+venv/
+env/
+# Tooling caches
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.coverage
+htmlcov/
+coverage.xml
+# Benchmark output. Only the official committed run is tracked, and within it
+# only the compact artifacts (CSV / Markdown / PNG / per-word). The large
+# combined JSON and the raw per-result checkpoints are regenerable, so ignore
+# them: `python scripts/run_official_benchmark.py` rebuilds everything.
+benchmarks/results/*
+!benchmarks/results/official/
+benchmarks/results/official/raw/
+benchmarks/results/official/*.json
+# OS / editor
+.DS_Store
+.idea/
+.vscode/
+*.swp
+# Local planning / writing notes
+plan.html
+docs/blog-post.md
+docs/paper-outline.md
+docs/pypi-plan.md
+# Jupyter
+.ipynb_checkpoints/

wordlesmith-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,47 @@
+# Changelog
+All notable changes to this project are documented here. The format is based on
+[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
+adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.1.0] - Unreleased
+First packaged release. Rewrites the original exploratory notebook into an
+installable, tested Python package with a CLI and a strategy-comparison
+benchmark suite.
+### Added
+- `wordlesmith` package with a zero-dependency core (pure standard library).
+- Exact Wordle feedback scoring with correct duplicate-letter handling, encoded
+  as base-3 patterns (`feedback`, `pattern_to_string`, `pattern_from_string`).
+- `GameState` / `simulate` engine that filters candidates purely by
+  pattern-consistency.
+- Five strategies behind a common interface: `frequency` (the original
+  positional-frequency baseline), `entropy`, `expected-size`, `minimax`, and a
+  `random` control. Scoring strategies support both `answers` and `all` guess
+  pools.
+- Packaged word lists: the full 14,855-word valid-guess list (the default
+  answer and guess pool) and the original 2,315-word curated answer set, plus a
+  precomputed opening-guess table for fast first moves.
+- `--curated` flag (and `load_curated_answers`) to run against the original
+  2,315-word solution set instead of the full valid list.
+- `wordlesmith` CLI with `play`, `solve`, `benchmark`, and `compare`
+  subcommands (argparse, standard library only).
+- Benchmark runner with average/median/max/fail metrics, guess-distribution
+  histograms, and CSV/JSON/Markdown output; optional plots via the `bench`
+  extra.
+- Test suite (pytest) covering duplicate-letter scoring edge cases,
+  pattern-consistency filtering, strategy selection, and the CLI.
+- GitHub Actions CI (lint, type-check, test matrix on Python 3.10-3.13, build).
+### Changed
+- The engine no longer tracks greens/yellows/grays with ad-hoc bookkeeping. The
+  pattern-consistency filter fixes latent duplicate-letter bugs in the original
+  notebook, which slightly improves the frequency baseline (about 3.64 vs the
+  previously reported 3.67 average).
+- The solver now considers every valid word a possible answer by default,
+  instead of only the original 2,315 solutions. The NYT has revised the answer
+  set over time (for example MAVEN, a real answer, is not in the original list),
+  so the old default could dead-end on a legitimate puzzle. The full-valid pool
+  never does, at the cost of a somewhat higher average; the classic numbers are
+  still available with `--curated`.

wordlesmith-0.1.0/LICENSE.txt ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Aditya Mehrotra
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

wordlesmith-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,300 @@
+Metadata-Version: 2.4
+Name: wordlesmith
+Version: 0.1.0
+Summary: A Wordle solver with pluggable strategies and a strategy-comparison benchmark suite.
+Project-URL: Homepage, https://github.com/adityakmehrotra/wordlesmith
+Project-URL: Repository, https://github.com/adityakmehrotra/wordlesmith
+Project-URL: Issues, https://github.com/adityakmehrotra/wordlesmith/issues
+Author-email: Aditya Mehrotra <adi1.mehrotra@gmail.com>
+License: MIT License
+        Copyright (c) 2026 Aditya Mehrotra
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE.txt
+Keywords: benchmark,entropy,game,puzzle,solver,wordle
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Games/Entertainment :: Puzzle Games
+Classifier: Typing :: Typed
+Requires-Python: >=3.10
+Provides-Extra: bench
+Requires-Dist: matplotlib>=3.6; extra == 'bench'
+Provides-Extra: dev
+Requires-Dist: build>=1.0; extra == 'dev'
+Requires-Dist: mypy>=1.8; extra == 'dev'
+Requires-Dist: pytest-cov>=4.1; extra == 'dev'
+Requires-Dist: pytest>=7.4; extra == 'dev'
+Requires-Dist: ruff>=0.5; extra == 'dev'
+Requires-Dist: twine>=5.0; extra == 'dev'
+Description-Content-Type: text/markdown
+<a id="readme-top"></a>
+<div align="center">
+[![CI][ci-shield]][ci-url]
+[![Python 3.10+][python-shield]][python-url]
+[![MIT License][license-shield]][license-url]
+# wordlesmith
+A Wordle solver with pluggable strategies and a benchmark suite for comparing them.
+Considers every valid word a possible answer, so it never dead-ends on a real puzzle (entropy
+averages 4.52 guesses over all 14,855 valid words, and 3.60 on the classic 2,315-answer set). The
+core is pure standard library.
+<img src="https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/docs/demo.gif" alt="wordlesmith playing along with a Wordle" width="760">
+</div>
+---
+## Contents
+- [What it does](#what-it-does)
+- [Install](#install)
+- [Quickstart](#quickstart)
+- [Benchmark](#benchmark)
+- [How it works](#how-it-works)
+- [Strategies](#strategies) ([in-depth](docs/strategies.md))
+- [Development](#development)
+- [License &amp; contact](#license--contact)
+## What it does
+`wordlesmith` is a command-line and library Wordle solver. It ships:
+- A Wordle scoring engine that handles duplicate letters correctly, which is where most
+  solvers have subtle bugs.
+- Five strategies behind one interface: positional frequency, entropy, expected remaining
+  size, minimax, and a random control.
+- A benchmark framework that plays every valid word and reports the full guess distribution.
+- The full 14,855-word valid-guess list (the default answer pool, so it never dead-ends on a
+  real puzzle) and the original 2,315-word answer set, packaged with a precomputed
+  opening-guess table so the first move is instant.
+The core has no third-party dependencies. Plotting is the only extra.
+## Install
+```bash
+# From GitHub
+pip install "git+https://github.com/adityakmehrotra/wordlesmith"
+# For development (tests, lint, plots)
+git clone https://github.com/adityakmehrotra/wordlesmith
+cd wordlesmith
+pip install -e ".[dev,bench]"
+```
+Requires Python 3.10+.
+## Quickstart
+### Command line
+Auto-solve a known word:
+<img src="https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/docs/solve.gif" alt="wordlesmith solve maven, then solve crane --curated" width="720">
+(`maven` is a real NYT answer that isn't in the original 2,315-word list, so a solver built only
+on that list would never find it. The default pool is every valid word, so this just works.)
+Play along with a real puzzle: it suggests a guess, you type the colors back
+(`g`=green, `y`=yellow, `x`=gray):
+```console
+$ wordlesmith play --strategy entropy
+Turn 1 suggestion: TARES   (14855 candidates)
+Enter feedback: xgxgx
+Turn 2 suggestion: LADEN   (150 candidates)
+Enter feedback: ...
+```
+Benchmark one strategy, or compare several:
+```console
+$ wordlesmith benchmark --strategy entropy --sample 300
+$ wordlesmith compare --strategies frequency,entropy,minimax --markdown
+$ wordlesmith compare --curated --markdown          # the classic 2,315-answer set
+```
+Run `wordlesmith --help` (or `wordlesmith <command> --help`) for all options,
+including `--curated`, `--guess-pool all`, `--jobs` for parallel benchmarks, and
+`--answers`/`--allowed` for custom word lists.
+### Python API
+```python
+from wordlesmith import get_strategy, simulate, feedback, pattern_to_string
+# Score a guess against a target (base-3 pattern; g/y/x string for humans)
+print(pattern_to_string(feedback("speed", "abide")))  # -> xxyxy
+# Auto-play a word
+result = simulate("maven", get_strategy("entropy"))
+print(result.turns, result.guesses)  # -> 3 ['tares', 'laden', 'maven']
+```
+## Benchmark
+Lower average is better; `max` is the worst game; `fail%` is games not solved within six
+guesses.
+### Primary: every valid word (the default)
+Each strategy plays all 14,855 valid words, guessing from the words still consistent with the
+feedback. This is how the solver actually runs, so it never dead-ends on a real puzzle:
+| strategy | pool | avg | max | fail% |
+| --- | --- | --- | --- | --- |
+| random | answers | 5.061 | >6 | 16.68 |
+| frequency | answers | 4.922 | >6 | 14.57 |
+| minimax | answers | 4.658 | >6 | 11.29 |
+| expected-size | answers | 4.585 | >6 | 10.57 |
+| entropy | answers | 4.523 | >6 | 9.47 |
+The averages are higher and the failure rate is non-trivial (about 9% even for entropy) because the
+full valid list is packed with near-identical clusters (`match`/`batch`/`catch`/`hatch`/..., the
+`-ound` and `-ight` families, plus many obscure words) that simply cannot be separated in six
+guesses. Those hard words are almost never real NYT answers, so for actual daily play the curated
+number below is the realistic one; this table is the pessimistic "solve literally any valid word"
+figure.
+![Guess distribution by strategy](https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/benchmarks/results/official/distribution_valid.png)
+### Secondary: the classic 2,315-answer set (`--curated`)
+Restricted to the original Wordle solution set, the problem is easier and the numbers are
+comparable to published solvers. The `all` pool (guessing any word for information) gets close
+to the known optimum of about 3.421:
+| strategy | pool | avg | max | fail% |
+| --- | --- | --- | --- | --- |
+| random | answers | 4.039 | >6 | 0.82 |
+| frequency | answers | 3.640 | >6 | 0.60 |
+| expected-size | answers | 3.623 | >6 | 0.60 |
+| minimax | answers | 3.677 | >6 | 0.65 |
+| entropy | answers | 3.598 | >6 | 0.48 |
+| entropy | all | 3.465 | 6 | 0.00 |
+| expected-size | all | 3.481 | 5 | 0.00 |
+| minimax | all | 3.573 | 6 | 0.00 |
+A concrete example of what the smart strategies buy you: solving `mound` on the curated set, the
+frequency baseline burns turns cycling through lookalikes (`slate`, `crony`, `bound`, `found`,
+`hound`, `mound`) while entropy picks a splitting guess and finishes in three (`raise`, `mulch`,
+`mound`).
+<sub>Methodology: a game is a failure if unsolved in 6 guesses (counted as 7 in the mean).
+Deterministic strategies are reproducible; `random` uses a fixed seed. Full results and per-word
+data are in [`benchmarks/results/official/`](https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/benchmarks/results/official); regenerate the primary
+with `python scripts/run_official_benchmark.py`. The primary `answers`-pool run takes about 10
+minutes per strategy on 9 cores; the curated `all`-pool run scores every valid word each turn and
+takes far longer, which is why it stays on the smaller curated set. Use `--sample N` for a quick
+estimate.</sub>
+## How it works
+Scoring: Wordle feedback is computed in two passes. Greens are assigned first and each
+consumes its letter in the target; yellows are then assigned left to right, each consuming
+a remaining occurrence. A guess letter with no occurrence left is gray. This is why the
+second `E` in `SPEED` is gray against `ABIDE`, which has only one `E`.
+Filtering: after each guess the solver keeps a word `w` only if `feedback(guess, w)`
+equals the pattern actually observed. This single rule handles every duplicate-letter case
+correctly, so there is no separate (and bug-prone) tracking of which letters are "in" or
+"out".
+Word lists: by default every valid Wordle word is treated as a possible answer. The original
+Wordle solution set was only 2,315 words, but the NYT has revised it over time, so a solver
+built on that list can dead-end on a legitimate answer it never considered (`maven`, for
+instance). Using the full valid list avoids that, at the cost of a somewhat higher average
+since there are more words to tell apart. Pass `--curated` to fall back to the original
+2,315-answer set (faster, and the numbers become comparable to published solvers).
+## Strategies
+| name | idea | good for |
+| --- | --- | --- |
+| `frequency` | Sum of per-position letter frequencies among candidates (the original baseline). | A strong, cheap heuristic. |
+| `entropy` | Maximize expected information (Shannon entropy of the feedback-bucket distribution). | Best average guess count. |
+| `expected-size` | Minimize the expected number of remaining candidates. | Simple, nearly as strong as entropy. |
+| `minimax` | Minimize the largest feedback bucket (worst case). | Smallest worst case. |
+| `random` | Guess a random consistent word. | A control / lower bound. |
+The entropy, expected-size, and minimax strategies accept a `--guess-pool` of `answers`
+(guess from remaining candidates) or `all` (guess from the full allowed list).
+See [`docs/strategies.md`](docs/strategies.md) for an in-depth explanation of each strategy:
+the scoring formulas, the bucket-splitting idea the information-theoretic strategies share
+(with a worked example), the guess-pool trade-off, and how to add your own strategy.
+## Limitations
+- **Pure Python is slow for the `all` guess pool.** Scoring every valid word each turn takes
+  minutes per benchmark, which is why the committed `all`-pool numbers stay on the curated set.
+  For a single interactive `solve`/`play` it's fine (the opening is precomputed).
+- **The word list is a snapshot.** `valid_words.txt` is the NYT valid-guess list as of mid-2025.
+  If the NYT adds words later, refresh it and regenerate the opening table.
+- **Six-guess failures are expected.** Over the full valid list even entropy fails about 9% of
+  games, because clusters like `match`/`batch`/`catch`/`hatch` or the `-ound`/`-ight` families
+  can't be separated in six turns. Those words are rarely real answers, so `--curated` is the
+  realistic daily-play figure.
+- **The strategies are greedy.** They optimize the current guess, not the whole game tree, so
+  even the best is a step behind the known optimal decision tree (about 3.421 on the curated set).
+- **English five-letter Wordle only.** No hard mode and no other word lengths (the engine assumes
+  five letters), though `--answers`/`--allowed` accept custom five-letter word lists.
+## Development
+```bash
+pip install -e ".[dev,bench]"
+pytest --cov=wordlesmith      # tests + coverage
+ruff check . && ruff format --check .
+mypy src/
+python -m build && twine check dist/*
+```
+Contributions welcome. A natural extension is adding a new strategy: implement `Strategy`,
+register it, and it shows up in `compare` automatically. Please open an
+[issue](https://github.com/adityakmehrotra/wordlesmith/issues) or PR.
+## License &amp; contact
+Distributed under the MIT License. See [`LICENSE.txt`](LICENSE.txt).
+Aditya Mehrotra. Reach me at `adi1.mehrotra@gmail.com` or on
+[LinkedIn](https://www.linkedin.com/in/aditya-mehrotra-).
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+[ci-shield]: https://github.com/adityakmehrotra/wordlesmith/actions/workflows/ci.yml/badge.svg
+[ci-url]: https://github.com/adityakmehrotra/wordlesmith/actions/workflows/ci.yml
+[python-shield]: https://img.shields.io/badge/python-3.10%2B-blue
+[python-url]: https://www.python.org/downloads/
+[license-shield]: https://img.shields.io/badge/license-MIT-green
+[license-url]: https://github.com/adityakmehrotra/wordlesmith/blob/main/LICENSE.txt

wordlesmith-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,247 @@
+<a id="readme-top"></a>
+<div align="center">
+[![CI][ci-shield]][ci-url]
+[![Python 3.10+][python-shield]][python-url]
+[![MIT License][license-shield]][license-url]
+# wordlesmith
+A Wordle solver with pluggable strategies and a benchmark suite for comparing them.
+Considers every valid word a possible answer, so it never dead-ends on a real puzzle (entropy
+averages 4.52 guesses over all 14,855 valid words, and 3.60 on the classic 2,315-answer set). The
+core is pure standard library.
+<img src="https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/docs/demo.gif" alt="wordlesmith playing along with a Wordle" width="760">
+</div>
+---
+## Contents
+- [What it does](#what-it-does)
+- [Install](#install)
+- [Quickstart](#quickstart)
+- [Benchmark](#benchmark)
+- [How it works](#how-it-works)
+- [Strategies](#strategies) ([in-depth](docs/strategies.md))
+- [Development](#development)
+- [License &amp; contact](#license--contact)
+## What it does
+`wordlesmith` is a command-line and library Wordle solver. It ships:
+- A Wordle scoring engine that handles duplicate letters correctly, which is where most
+  solvers have subtle bugs.
+- Five strategies behind one interface: positional frequency, entropy, expected remaining
+  size, minimax, and a random control.
+- A benchmark framework that plays every valid word and reports the full guess distribution.
+- The full 14,855-word valid-guess list (the default answer pool, so it never dead-ends on a
+  real puzzle) and the original 2,315-word answer set, packaged with a precomputed
+  opening-guess table so the first move is instant.
+The core has no third-party dependencies. Plotting is the only extra.
+## Install
+```bash
+# From GitHub
+pip install "git+https://github.com/adityakmehrotra/wordlesmith"
+# For development (tests, lint, plots)
+git clone https://github.com/adityakmehrotra/wordlesmith
+cd wordlesmith
+pip install -e ".[dev,bench]"
+```
+Requires Python 3.10+.
+## Quickstart
+### Command line
+Auto-solve a known word:
+<img src="https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/docs/solve.gif" alt="wordlesmith solve maven, then solve crane --curated" width="720">
+(`maven` is a real NYT answer that isn't in the original 2,315-word list, so a solver built only
+on that list would never find it. The default pool is every valid word, so this just works.)
+Play along with a real puzzle: it suggests a guess, you type the colors back
+(`g`=green, `y`=yellow, `x`=gray):
+```console
+$ wordlesmith play --strategy entropy
+Turn 1 suggestion: TARES   (14855 candidates)
+Enter feedback: xgxgx
+Turn 2 suggestion: LADEN   (150 candidates)
+Enter feedback: ...
+```
+Benchmark one strategy, or compare several:
+```console
+$ wordlesmith benchmark --strategy entropy --sample 300
+$ wordlesmith compare --strategies frequency,entropy,minimax --markdown
+$ wordlesmith compare --curated --markdown          # the classic 2,315-answer set
+```
+Run `wordlesmith --help` (or `wordlesmith <command> --help`) for all options,
+including `--curated`, `--guess-pool all`, `--jobs` for parallel benchmarks, and
+`--answers`/`--allowed` for custom word lists.
+### Python API
+```python
+from wordlesmith import get_strategy, simulate, feedback, pattern_to_string
+# Score a guess against a target (base-3 pattern; g/y/x string for humans)
+print(pattern_to_string(feedback("speed", "abide")))  # -> xxyxy
+# Auto-play a word
+result = simulate("maven", get_strategy("entropy"))
+print(result.turns, result.guesses)  # -> 3 ['tares', 'laden', 'maven']
+```
+## Benchmark
+Lower average is better; `max` is the worst game; `fail%` is games not solved within six
+guesses.
+### Primary: every valid word (the default)
+Each strategy plays all 14,855 valid words, guessing from the words still consistent with the
+feedback. This is how the solver actually runs, so it never dead-ends on a real puzzle:
+| strategy | pool | avg | max | fail% |
+| --- | --- | --- | --- | --- |
+| random | answers | 5.061 | >6 | 16.68 |
+| frequency | answers | 4.922 | >6 | 14.57 |
+| minimax | answers | 4.658 | >6 | 11.29 |
+| expected-size | answers | 4.585 | >6 | 10.57 |
+| entropy | answers | 4.523 | >6 | 9.47 |
+The averages are higher and the failure rate is non-trivial (about 9% even for entropy) because the
+full valid list is packed with near-identical clusters (`match`/`batch`/`catch`/`hatch`/..., the
+`-ound` and `-ight` families, plus many obscure words) that simply cannot be separated in six
+guesses. Those hard words are almost never real NYT answers, so for actual daily play the curated
+number below is the realistic one; this table is the pessimistic "solve literally any valid word"
+figure.
+![Guess distribution by strategy](https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/benchmarks/results/official/distribution_valid.png)
+### Secondary: the classic 2,315-answer set (`--curated`)
+Restricted to the original Wordle solution set, the problem is easier and the numbers are
+comparable to published solvers. The `all` pool (guessing any word for information) gets close
+to the known optimum of about 3.421:
+| strategy | pool | avg | max | fail% |
+| --- | --- | --- | --- | --- |
+| random | answers | 4.039 | >6 | 0.82 |
+| frequency | answers | 3.640 | >6 | 0.60 |
+| expected-size | answers | 3.623 | >6 | 0.60 |
+| minimax | answers | 3.677 | >6 | 0.65 |
+| entropy | answers | 3.598 | >6 | 0.48 |
+| entropy | all | 3.465 | 6 | 0.00 |
+| expected-size | all | 3.481 | 5 | 0.00 |
+| minimax | all | 3.573 | 6 | 0.00 |
+A concrete example of what the smart strategies buy you: solving `mound` on the curated set, the
+frequency baseline burns turns cycling through lookalikes (`slate`, `crony`, `bound`, `found`,
+`hound`, `mound`) while entropy picks a splitting guess and finishes in three (`raise`, `mulch`,
+`mound`).
+<sub>Methodology: a game is a failure if unsolved in 6 guesses (counted as 7 in the mean).
+Deterministic strategies are reproducible; `random` uses a fixed seed. Full results and per-word
+data are in [`benchmarks/results/official/`](https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/benchmarks/results/official); regenerate the primary
+with `python scripts/run_official_benchmark.py`. The primary `answers`-pool run takes about 10
+minutes per strategy on 9 cores; the curated `all`-pool run scores every valid word each turn and
+takes far longer, which is why it stays on the smaller curated set. Use `--sample N` for a quick
+estimate.</sub>
+## How it works
+Scoring: Wordle feedback is computed in two passes. Greens are assigned first and each
+consumes its letter in the target; yellows are then assigned left to right, each consuming
+a remaining occurrence. A guess letter with no occurrence left is gray. This is why the
+second `E` in `SPEED` is gray against `ABIDE`, which has only one `E`.
+Filtering: after each guess the solver keeps a word `w` only if `feedback(guess, w)`
+equals the pattern actually observed. This single rule handles every duplicate-letter case
+correctly, so there is no separate (and bug-prone) tracking of which letters are "in" or
+"out".
+Word lists: by default every valid Wordle word is treated as a possible answer. The original
+Wordle solution set was only 2,315 words, but the NYT has revised it over time, so a solver
+built on that list can dead-end on a legitimate answer it never considered (`maven`, for
+instance). Using the full valid list avoids that, at the cost of a somewhat higher average
+since there are more words to tell apart. Pass `--curated` to fall back to the original
+2,315-answer set (faster, and the numbers become comparable to published solvers).
+## Strategies
+| name | idea | good for |
+| --- | --- | --- |
+| `frequency` | Sum of per-position letter frequencies among candidates (the original baseline). | A strong, cheap heuristic. |
+| `entropy` | Maximize expected information (Shannon entropy of the feedback-bucket distribution). | Best average guess count. |
+| `expected-size` | Minimize the expected number of remaining candidates. | Simple, nearly as strong as entropy. |
+| `minimax` | Minimize the largest feedback bucket (worst case). | Smallest worst case. |
+| `random` | Guess a random consistent word. | A control / lower bound. |
+The entropy, expected-size, and minimax strategies accept a `--guess-pool` of `answers`
+(guess from remaining candidates) or `all` (guess from the full allowed list).
+See [`docs/strategies.md`](docs/strategies.md) for an in-depth explanation of each strategy:
+the scoring formulas, the bucket-splitting idea the information-theoretic strategies share
+(with a worked example), the guess-pool trade-off, and how to add your own strategy.
+## Limitations
+- **Pure Python is slow for the `all` guess pool.** Scoring every valid word each turn takes
+  minutes per benchmark, which is why the committed `all`-pool numbers stay on the curated set.
+  For a single interactive `solve`/`play` it's fine (the opening is precomputed).
+- **The word list is a snapshot.** `valid_words.txt` is the NYT valid-guess list as of mid-2025.
+  If the NYT adds words later, refresh it and regenerate the opening table.
+- **Six-guess failures are expected.** Over the full valid list even entropy fails about 9% of
+  games, because clusters like `match`/`batch`/`catch`/`hatch` or the `-ound`/`-ight` families
+  can't be separated in six turns. Those words are rarely real answers, so `--curated` is the
+  realistic daily-play figure.
+- **The strategies are greedy.** They optimize the current guess, not the whole game tree, so
+  even the best is a step behind the known optimal decision tree (about 3.421 on the curated set).
+- **English five-letter Wordle only.** No hard mode and no other word lengths (the engine assumes
+  five letters), though `--answers`/`--allowed` accept custom five-letter word lists.
+## Development
+```bash
+pip install -e ".[dev,bench]"
+pytest --cov=wordlesmith      # tests + coverage
+ruff check . && ruff format --check .
+mypy src/
+python -m build && twine check dist/*
+```
+Contributions welcome. A natural extension is adding a new strategy: implement `Strategy`,
+register it, and it shows up in `compare` automatically. Please open an
+[issue](https://github.com/adityakmehrotra/wordlesmith/issues) or PR.
+## License &amp; contact
+Distributed under the MIT License. See [`LICENSE.txt`](LICENSE.txt).
+Aditya Mehrotra. Reach me at `adi1.mehrotra@gmail.com` or on
+[LinkedIn](https://www.linkedin.com/in/aditya-mehrotra-).
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+[ci-shield]: https://github.com/adityakmehrotra/wordlesmith/actions/workflows/ci.yml/badge.svg
+[ci-url]: https://github.com/adityakmehrotra/wordlesmith/actions/workflows/ci.yml
+[python-shield]: https://img.shields.io/badge/python-3.10%2B-blue
+[python-url]: https://www.python.org/downloads/
+[license-shield]: https://img.shields.io/badge/license-MIT-green
+[license-url]: https://github.com/adityakmehrotra/wordlesmith/blob/main/LICENSE.txt

wordlesmith-0.1.0/benchmarks/results/official/curated_all.csv ADDED Viewed

@@ -0,0 +1,4 @@
+strategy,guess_pool,num_words,average,median,max,fails,fail_pct,turns_1,turns_2,turns_3,turns_4,turns_5,turns_6,turns_fail,wall_seconds,seed,package_version
+expected-size,all,2315,3.4812,3,5,0,0.0000,0,55,1130,1091,39,0,0,574.09,0,0.1.0
+minimax,all,2315,3.5732,4,6,0,0.0000,1,53,990,1162,107,2,0,530.64,0,0.1.0
+entropy,all,2315,3.4648,3,6,0,0.0000,0,44,1216,991,63,1,0,530.19,0,0.1.0

wordlesmith-0.1.0/benchmarks/results/official/curated_all.md ADDED Viewed

@@ -0,0 +1,5 @@
+| strategy | pool | avg | median | max | fail% | time(s) |
+| --- | --- | --- | --- | --- | --- | --- |
+| expected-size | all | 3.481 | 3.0 | 5 | 0.00 | 574.1 |
+| minimax | all | 3.573 | 4.0 | 6 | 0.00 | 530.6 |
+| entropy | all | 3.465 | 3.0 | 6 | 0.00 | 530.2 |

wordlesmith-0.1.0/benchmarks/results/official/curated_answers.csv ADDED Viewed

@@ -0,0 +1,6 @@
+strategy,guess_pool,num_words,average,median,max,fails,fail_pct,turns_1,turns_2,turns_3,turns_4,turns_5,turns_6,turns_fail,wall_seconds,seed,package_version
+random,answers,2315,4.0393,4,7,19,0.8207,0,98,597,935,505,161,19,11.68,0,0.1.0
+frequency,answers,2315,3.6397,4,7,14,0.6048,1,146,900,975,239,40,14,2.19,0,0.1.0
+expected-size,answers,2315,3.6233,4,7,14,0.6048,1,131,957,946,224,42,14,6.59,0,0.1.0
+minimax,answers,2315,3.6773,4,7,15,0.6479,1,122,880,1009,241,47,15,6.38,0,0.1.0
+entropy,answers,2315,3.5983,4,7,11,0.4752,1,131,999,919,207,47,11,6.24,0,0.1.0