text-summarizer-aweebtaku 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- text_summarizer/cli.py +7 -0
- text_summarizer/summarizer.py +10 -1
- {text_summarizer_aweebtaku-1.0.0.dist-info → text_summarizer_aweebtaku-1.0.2.dist-info}/METADATA +10 -13
- text_summarizer_aweebtaku-1.0.2.dist-info/RECORD +11 -0
- {text_summarizer_aweebtaku-1.0.0.dist-info → text_summarizer_aweebtaku-1.0.2.dist-info}/entry_points.txt +1 -0
- text_summarizer_aweebtaku-1.0.0.dist-info/RECORD +0 -11
- {text_summarizer_aweebtaku-1.0.0.dist-info → text_summarizer_aweebtaku-1.0.2.dist-info}/WHEEL +0 -0
- {text_summarizer_aweebtaku-1.0.0.dist-info → text_summarizer_aweebtaku-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {text_summarizer_aweebtaku-1.0.0.dist-info → text_summarizer_aweebtaku-1.0.2.dist-info}/top_level.txt +0 -0
text_summarizer/cli.py
CHANGED
|
@@ -9,9 +9,16 @@ def main():
|
|
|
9
9
|
help="Number of sentences in summary")
|
|
10
10
|
parser.add_argument("--csv-file", help="Path to CSV file with articles")
|
|
11
11
|
parser.add_argument("--article-id", type=int, help="Article ID to summarize (if CSV provided)")
|
|
12
|
+
parser.add_argument("--gui", action="store_true", help="Launch graphical user interface")
|
|
12
13
|
|
|
13
14
|
args = parser.parse_args()
|
|
14
15
|
|
|
16
|
+
if args.gui:
|
|
17
|
+
# Import and run GUI
|
|
18
|
+
from .ui import main as gui_main
|
|
19
|
+
gui_main()
|
|
20
|
+
return
|
|
21
|
+
|
|
15
22
|
try:
|
|
16
23
|
summarizer = TextSummarizer(glove_path=args.glove_path, num_sentences=args.num_sentences)
|
|
17
24
|
|
text_summarizer/summarizer.py
CHANGED
|
@@ -6,6 +6,7 @@ from nltk.tokenize import sent_tokenize
|
|
|
6
6
|
from nltk.corpus import stopwords
|
|
7
7
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
8
8
|
import networkx as nx
|
|
9
|
+
import pkg_resources
|
|
9
10
|
|
|
10
11
|
# Download necessary NLTK data
|
|
11
12
|
# nltk.download('punkt_tab')
|
|
@@ -14,7 +15,15 @@ import networkx as nx
|
|
|
14
15
|
class TextSummarizer:
|
|
15
16
|
"""A class for summarizing text documents using GloVe embeddings and PageRank."""
|
|
16
17
|
|
|
17
|
-
def __init__(self, glove_path=
|
|
18
|
+
def __init__(self, glove_path=None, num_sentences=5):
|
|
19
|
+
if glove_path is None:
|
|
20
|
+
# Try to find GloVe file in package data
|
|
21
|
+
try:
|
|
22
|
+
glove_path = pkg_resources.resource_filename('text_summarizer', 'glove.6B.100d.txt/glove.6B.100d.txt')
|
|
23
|
+
except (FileNotFoundError, ModuleNotFoundError):
|
|
24
|
+
# Fallback to default path
|
|
25
|
+
glove_path = 'glove.6B.100d.txt/glove.6B.100d.txt'
|
|
26
|
+
|
|
18
27
|
self.glove_path = glove_path
|
|
19
28
|
self.num_sentences = num_sentences
|
|
20
29
|
self.word_embeddings = {}
|
{text_summarizer_aweebtaku-1.0.0.dist-info → text_summarizer_aweebtaku-1.0.2.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: text-summarizer-aweebtaku
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
|
|
5
5
|
Home-page: https://github.com/AWeebTaku/Summarizer
|
|
6
6
|
Author: Your Name
|
|
@@ -60,6 +60,8 @@ A Python-based text summarization tool that uses GloVe word embeddings and PageR
|
|
|
60
60
|
pip install text-summarizer-aweebtaku
|
|
61
61
|
```
|
|
62
62
|
|
|
63
|
+
**Note:** This package includes the GloVe word embeddings file (~400MB), so the installation may take some time.
|
|
64
|
+
|
|
63
65
|
### Install from Source
|
|
64
66
|
|
|
65
67
|
1. Clone the repository:
|
|
@@ -73,16 +75,7 @@ cd Summarizer
|
|
|
73
75
|
pip install -e .
|
|
74
76
|
```
|
|
75
77
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
The tool requires GloVe word embeddings. Download the 100d version:
|
|
79
|
-
|
|
80
|
-
```bash
|
|
81
|
-
wget http://nlp.stanford.edu/data/glove.6B.zip
|
|
82
|
-
unzip glove.6B.zip
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
Place the `glove.6B.100d.txt` file in the project root or specify the path.
|
|
78
|
+
**Note:** The GloVe word embeddings are included in the package, so no additional download is required.
|
|
86
79
|
|
|
87
80
|
## Usage
|
|
88
81
|
|
|
@@ -96,10 +89,14 @@ text-summarizer-aweebtaku --csv-file data/tennis.csv --article-id 1
|
|
|
96
89
|
text-summarizer-aweebtaku
|
|
97
90
|
```
|
|
98
91
|
|
|
99
|
-
###
|
|
92
|
+
### Graphical User Interface
|
|
100
93
|
|
|
101
94
|
```bash
|
|
102
|
-
|
|
95
|
+
# Launch GUI (easiest way)
|
|
96
|
+
text-summarizer-aweebtaku --gui
|
|
97
|
+
|
|
98
|
+
# Or use the dedicated GUI command
|
|
99
|
+
text-summarizer-gui
|
|
103
100
|
```
|
|
104
101
|
|
|
105
102
|
### Python API
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
text_summarizer/__init__.py,sha256=juqSmwYQLqoiZpyLfxE1sJKoYLNAe_-a3_LOIUV6J6g,63
|
|
2
|
+
text_summarizer/cli.py,sha256=rWbSpT1gJ8kVcsTQ-ov6AZkfy5uUz2taAXeSnDEy0Zw,3773
|
|
3
|
+
text_summarizer/summarizer.py,sha256=NLc6OzZ6GhUbVpnm9OrT67lNG1wFRgVWmMdiEbAWJ5g,9639
|
|
4
|
+
text_summarizer/ui.py,sha256=Ky40zcr-_0zh5I7Kh4Bc8hKrEBdOALe5G4i3ukDJWts,16638
|
|
5
|
+
text_summarizer/data/tennis.csv,sha256=oEPZr4Dy6cmCDtdQ2QYJyJpERzQseuNJ53JP2XyIfBk,12943
|
|
6
|
+
text_summarizer_aweebtaku-1.0.2.dist-info/licenses/LICENSE,sha256=q53YqEH5OACuJ8YmE3i9pND509hapVaOX42ix2AMkZ8,1085
|
|
7
|
+
text_summarizer_aweebtaku-1.0.2.dist-info/METADATA,sha256=caJbPV_mGdI6K6KyzZuiKg_9LLx67DDuDa1FIjZKif0,4859
|
|
8
|
+
text_summarizer_aweebtaku-1.0.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
9
|
+
text_summarizer_aweebtaku-1.0.2.dist-info/entry_points.txt,sha256=a8n647pYmETd5RzGoOBcfYtIxxNFNu7P5zctmhpldNY,117
|
|
10
|
+
text_summarizer_aweebtaku-1.0.2.dist-info/top_level.txt,sha256=2s-4Uyii86k2iEeiIi0JghAXW47cEQ8qM_ONYPs9Gh8,16
|
|
11
|
+
text_summarizer_aweebtaku-1.0.2.dist-info/RECORD,,
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
text_summarizer/__init__.py,sha256=juqSmwYQLqoiZpyLfxE1sJKoYLNAe_-a3_LOIUV6J6g,63
|
|
2
|
-
text_summarizer/cli.py,sha256=ZwpSJTbAQvXLDhSdbKWE054O4ttLj60X7GBuWowitLU,3550
|
|
3
|
-
text_summarizer/summarizer.py,sha256=ctQlZSjP9jkgM9ZZ7yTtjqG60xr9CRnUWciCeSpLujo,9242
|
|
4
|
-
text_summarizer/ui.py,sha256=Ky40zcr-_0zh5I7Kh4Bc8hKrEBdOALe5G4i3ukDJWts,16638
|
|
5
|
-
text_summarizer/data/tennis.csv,sha256=oEPZr4Dy6cmCDtdQ2QYJyJpERzQseuNJ53JP2XyIfBk,12943
|
|
6
|
-
text_summarizer_aweebtaku-1.0.0.dist-info/licenses/LICENSE,sha256=q53YqEH5OACuJ8YmE3i9pND509hapVaOX42ix2AMkZ8,1085
|
|
7
|
-
text_summarizer_aweebtaku-1.0.0.dist-info/METADATA,sha256=bzPTGyKfPT54u4BNlDRmgyrI5f3ngUZBLL3CHE3l0Co,4802
|
|
8
|
-
text_summarizer_aweebtaku-1.0.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
9
|
-
text_summarizer_aweebtaku-1.0.0.dist-info/entry_points.txt,sha256=5PYNpbprDgkQtQUFzv5f_MW5OXI5GZu_zKqhGFoh_2o,71
|
|
10
|
-
text_summarizer_aweebtaku-1.0.0.dist-info/top_level.txt,sha256=2s-4Uyii86k2iEeiIi0JghAXW47cEQ8qM_ONYPs9Gh8,16
|
|
11
|
-
text_summarizer_aweebtaku-1.0.0.dist-info/RECORD,,
|
{text_summarizer_aweebtaku-1.0.0.dist-info → text_summarizer_aweebtaku-1.0.2.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|