textstat-py 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- textstat_py-0.1.0/PKG-INFO +99 -0
- textstat_py-0.1.0/README.md +75 -0
- textstat_py-0.1.0/pyproject.toml +41 -0
- textstat_py-0.1.0/setup.cfg +4 -0
- textstat_py-0.1.0/tests/test_textstat.py +1069 -0
- textstat_py-0.1.0/textstat_py.egg-info/PKG-INFO +99 -0
- textstat_py-0.1.0/textstat_py.egg-info/SOURCES.txt +8 -0
- textstat_py-0.1.0/textstat_py.egg-info/dependency_links.txt +1 -0
- textstat_py-0.1.0/textstat_py.egg-info/entry_points.txt +2 -0
- textstat_py-0.1.0/textstat_py.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: textstat-py
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pure-Python text analysis: readability, vocabulary richness, sentiment, n-grams
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/Gnomecromancer/textstat-py
|
|
7
|
+
Project-URL: Repository, https://github.com/Gnomecromancer/textstat-py
|
|
8
|
+
Project-URL: Issues, https://github.com/Gnomecromancer/textstat-py/issues
|
|
9
|
+
Keywords: text,nlp,readability,flesch,gunning-fog,sentiment,vocabulary,text-analysis,linguistics
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# textstat
|
|
26
|
+
|
|
27
|
+
Text analysis for Python. Readability scores, vocabulary stats, sentiment, n-grams — no dependencies.
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
pip install textstat-py
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from textstat import analyze, flesch_reading_ease, grade_level_consensus
|
|
37
|
+
|
|
38
|
+
text = open("essay.txt").read()
|
|
39
|
+
|
|
40
|
+
print(flesch_reading_ease(text)) # 68.4
|
|
41
|
+
print(grade_level_consensus(text)) # 9.2
|
|
42
|
+
|
|
43
|
+
stats = analyze(text)
|
|
44
|
+
# stats is a flat dict with everything:
|
|
45
|
+
# reading_time_min, sentiment_label, vocabulary_richness, sentence_stats, ...
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## CLI
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
textstat document.txt
|
|
52
|
+
cat file.txt | textstat
|
|
53
|
+
textstat --json report.txt
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Functions
|
|
57
|
+
|
|
58
|
+
**Readability**
|
|
59
|
+
- `flesch_reading_ease(text)` — 0–100
|
|
60
|
+
- `flesch_kincaid_grade(text)` — US grade level
|
|
61
|
+
- `gunning_fog(text)` — years of education
|
|
62
|
+
- `coleman_liau_index(text)`
|
|
63
|
+
- `automated_readability_index(text)`
|
|
64
|
+
- `smog_index(text)`
|
|
65
|
+
- `grade_level_consensus(text)` — average across all grade metrics
|
|
66
|
+
|
|
67
|
+
**Vocabulary**
|
|
68
|
+
- `lexical_diversity(text)` — type-token ratio
|
|
69
|
+
- `mattr(text, window=100)` — moving-average TTR
|
|
70
|
+
- `herdan_c(text)`, `yule_k(text)`
|
|
71
|
+
- `hapax_legomena_ratio(text)` — fraction of words appearing once
|
|
72
|
+
- `vocabulary_richness(text)` — all of the above as a dict
|
|
73
|
+
|
|
74
|
+
**Counts & structure**
|
|
75
|
+
- `count_words(text)`, `count_sentences(text)`, `count_paragraphs(text)`
|
|
76
|
+
- `reading_time(text, wpm=200)`
|
|
77
|
+
- `sentence_stats(text)`, `paragraph_stats(text)`
|
|
78
|
+
|
|
79
|
+
**Sentiment**
|
|
80
|
+
- `sentiment_polarity(text)` — −1 to +1
|
|
81
|
+
- `sentiment_label(text)` — "positive" / "neutral" / "negative"
|
|
82
|
+
|
|
83
|
+
**N-grams**
|
|
84
|
+
- `top_ngrams(text, n=2, k=10)`
|
|
85
|
+
- `ngram_diversity(text, n=2)`
|
|
86
|
+
- `ngram_stats(text)`
|
|
87
|
+
|
|
88
|
+
**Misc**
|
|
89
|
+
- `top_words(text, n=10)`
|
|
90
|
+
- `word_frequency_distribution(text)`
|
|
91
|
+
- `text_density(text)`
|
|
92
|
+
|
|
93
|
+
## Requirements
|
|
94
|
+
|
|
95
|
+
Python 3.8+
|
|
96
|
+
|
|
97
|
+
## License
|
|
98
|
+
|
|
99
|
+
MIT
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# textstat
|
|
2
|
+
|
|
3
|
+
Text analysis for Python. Readability scores, vocabulary stats, sentiment, n-grams — no dependencies.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
pip install textstat-py
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from textstat import analyze, flesch_reading_ease, grade_level_consensus
|
|
13
|
+
|
|
14
|
+
text = open("essay.txt").read()
|
|
15
|
+
|
|
16
|
+
print(flesch_reading_ease(text)) # 68.4
|
|
17
|
+
print(grade_level_consensus(text)) # 9.2
|
|
18
|
+
|
|
19
|
+
stats = analyze(text)
|
|
20
|
+
# stats is a flat dict with everything:
|
|
21
|
+
# reading_time_min, sentiment_label, vocabulary_richness, sentence_stats, ...
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## CLI
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
textstat document.txt
|
|
28
|
+
cat file.txt | textstat
|
|
29
|
+
textstat --json report.txt
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Functions
|
|
33
|
+
|
|
34
|
+
**Readability**
|
|
35
|
+
- `flesch_reading_ease(text)` — 0–100
|
|
36
|
+
- `flesch_kincaid_grade(text)` — US grade level
|
|
37
|
+
- `gunning_fog(text)` — years of education
|
|
38
|
+
- `coleman_liau_index(text)`
|
|
39
|
+
- `automated_readability_index(text)`
|
|
40
|
+
- `smog_index(text)`
|
|
41
|
+
- `grade_level_consensus(text)` — average across all grade metrics
|
|
42
|
+
|
|
43
|
+
**Vocabulary**
|
|
44
|
+
- `lexical_diversity(text)` — type-token ratio
|
|
45
|
+
- `mattr(text, window=100)` — moving-average TTR
|
|
46
|
+
- `herdan_c(text)`, `yule_k(text)`
|
|
47
|
+
- `hapax_legomena_ratio(text)` — fraction of words appearing once
|
|
48
|
+
- `vocabulary_richness(text)` — all of the above as a dict
|
|
49
|
+
|
|
50
|
+
**Counts & structure**
|
|
51
|
+
- `count_words(text)`, `count_sentences(text)`, `count_paragraphs(text)`
|
|
52
|
+
- `reading_time(text, wpm=200)`
|
|
53
|
+
- `sentence_stats(text)`, `paragraph_stats(text)`
|
|
54
|
+
|
|
55
|
+
**Sentiment**
|
|
56
|
+
- `sentiment_polarity(text)` — −1 to +1
|
|
57
|
+
- `sentiment_label(text)` — "positive" / "neutral" / "negative"
|
|
58
|
+
|
|
59
|
+
**N-grams**
|
|
60
|
+
- `top_ngrams(text, n=2, k=10)`
|
|
61
|
+
- `ngram_diversity(text, n=2)`
|
|
62
|
+
- `ngram_stats(text)`
|
|
63
|
+
|
|
64
|
+
**Misc**
|
|
65
|
+
- `top_words(text, n=10)`
|
|
66
|
+
- `word_frequency_distribution(text)`
|
|
67
|
+
- `text_density(text)`
|
|
68
|
+
|
|
69
|
+
## Requirements
|
|
70
|
+
|
|
71
|
+
Python 3.8+
|
|
72
|
+
|
|
73
|
+
## License
|
|
74
|
+
|
|
75
|
+
MIT
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "textstat-py"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Pure-Python text analysis: readability, vocabulary richness, sentiment, n-grams"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
keywords = [
|
|
13
|
+
"text", "nlp", "readability", "flesch", "gunning-fog",
|
|
14
|
+
"sentiment", "vocabulary", "text-analysis", "linguistics"
|
|
15
|
+
]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: Text Processing :: Linguistic",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.scripts]
|
|
32
|
+
textstat = "textstat:main"
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/Gnomecromancer/textstat-py"
|
|
36
|
+
Repository = "https://github.com/Gnomecromancer/textstat-py"
|
|
37
|
+
Issues = "https://github.com/Gnomecromancer/textstat-py/issues"
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.packages.find]
|
|
40
|
+
where = ["."]
|
|
41
|
+
include = ["textstat*"]
|