text-summarizer-aweebtaku 1.0.1__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {text_summarizer_aweebtaku-1.0.1/text_summarizer_aweebtaku.egg-info → text_summarizer_aweebtaku-1.1.0}/PKG-INFO +6 -7
  2. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/README.md +4 -6
  3. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/requirements.txt +2 -1
  4. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/setup.py +1 -1
  5. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer/summarizer.py +70 -2
  6. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0/text_summarizer_aweebtaku.egg-info}/PKG-INFO +6 -7
  7. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer_aweebtaku.egg-info/requires.txt +1 -0
  8. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/LICENSE +0 -0
  9. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/MANIFEST.in +0 -0
  10. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/setup.cfg +0 -0
  11. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer/__init__.py +0 -0
  12. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer/cli.py +0 -0
  13. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer/data/tennis.csv +0 -0
  14. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer/ui.py +0 -0
  15. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer_aweebtaku.egg-info/SOURCES.txt +0 -0
  16. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer_aweebtaku.egg-info/dependency_links.txt +0 -0
  17. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer_aweebtaku.egg-info/entry_points.txt +0 -0
  18. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer_aweebtaku.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: text-summarizer-aweebtaku
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
5
5
  Home-page: https://github.com/AWeebTaku/Summarizer
6
6
  Author: Your Name
@@ -22,6 +22,7 @@ Requires-Dist: numpy
22
22
  Requires-Dist: nltk
23
23
  Requires-Dist: scikit-learn
24
24
  Requires-Dist: networkx
25
+ Requires-Dist: requests
25
26
  Dynamic: author
26
27
  Dynamic: author-email
27
28
  Dynamic: classifier
@@ -75,15 +76,13 @@ pip install -e .
75
76
 
76
77
  ### Download GloVe Embeddings
77
78
 
78
- The tool requires GloVe word embeddings. Download the 100d version:
79
+ **No manual download required!** The package will automatically download GloVe embeddings (100d, ~400MB) on first use and cache them in your home directory (`~/.text_summarizer/`).
79
80
 
80
- ```bash
81
- wget http://nlp.stanford.edu/data/glove.6B.zip
82
- unzip glove.6B.zip
81
+ If you prefer to use your own GloVe file, you can specify the path:
82
+ ```python
83
+ summarizer = TextSummarizer(glove_path='path/to/your/glove.6B.100d.txt')
83
84
  ```
84
85
 
85
- Place the `glove.6B.100d.txt` file in the project root or specify the path.
86
-
87
86
  ## Usage
88
87
 
89
88
  ### Command Line Interface
@@ -39,15 +39,13 @@ pip install -e .
39
39
 
40
40
  ### Download GloVe Embeddings
41
41
 
42
- The tool requires GloVe word embeddings. Download the 100d version:
42
+ **No manual download required!** The package will automatically download GloVe embeddings (100d, ~400MB) on first use and cache them in your home directory (`~/.text_summarizer/`).
43
43
 
44
- ```bash
45
- wget http://nlp.stanford.edu/data/glove.6B.zip
46
- unzip glove.6B.zip
44
+ If you prefer to use your own GloVe file, you can specify the path:
45
+ ```python
46
+ summarizer = TextSummarizer(glove_path='path/to/your/glove.6B.100d.txt')
47
47
  ```
48
48
 
49
- Place the `glove.6B.100d.txt` file in the project root or specify the path.
50
-
51
49
  ## Usage
52
50
 
53
51
  ### Command Line Interface
@@ -2,4 +2,5 @@ pandas
2
2
  numpy
3
3
  nltk
4
4
  scikit-learn
5
- networkx
5
+ networkx
6
+ requests
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
8
8
 
9
9
  setup(
10
10
  name="text-summarizer-aweebtaku",
11
- version="1.0.1",
11
+ version="1.1.0",
12
12
  author="Your Name",
13
13
  author_email="your.email@example.com",
14
14
  description="A text summarization tool using GloVe embeddings and PageRank algorithm",
@@ -2,6 +2,9 @@ import pandas as pd
2
2
  import numpy as np
3
3
  import nltk
4
4
  import os
5
+ import requests
6
+ import zipfile
7
+ from pathlib import Path
5
8
  from nltk.tokenize import sent_tokenize
6
9
  from nltk.corpus import stopwords
7
10
  from sklearn.metrics.pairwise import cosine_similarity
@@ -14,24 +17,89 @@ import networkx as nx
14
17
  class TextSummarizer:
15
18
  """A class for summarizing text documents using GloVe embeddings and PageRank."""
16
19
 
17
- def __init__(self, glove_path='glove.6B.100d.txt/glove.6B.100d.txt', num_sentences=5):
18
- self.glove_path = glove_path
20
+ def __init__(self, glove_path=None, num_sentences=5):
19
21
  self.num_sentences = num_sentences
20
22
  self.word_embeddings = {}
21
23
  self.stop_words = set(stopwords.words('english'))
24
+
25
+ # Set default GloVe path
26
+ if glove_path is None:
27
+ glove_path = self._get_default_glove_path()
28
+
29
+ self.glove_path = glove_path
22
30
  self._load_embeddings()
23
31
 
32
+ def _get_default_glove_path(self):
33
+ """Get the default path for GloVe embeddings."""
34
+ # Use user's home directory for data
35
+ home_dir = Path.home()
36
+ glove_dir = home_dir / '.text_summarizer'
37
+ glove_dir.mkdir(exist_ok=True)
38
+ return glove_dir / 'glove.6B.100d.txt'
39
+
40
+ def _download_glove_embeddings(self):
41
+ """Download GloVe embeddings if not present."""
42
+ print("GloVe embeddings not found. Downloading from Stanford NLP...")
43
+
44
+ # Create directory if it doesn't exist
45
+ glove_file = Path(self.glove_path)
46
+ glove_file.parent.mkdir(exist_ok=True)
47
+
48
+ # Download the zip file
49
+ url = "https://nlp.stanford.edu/data/glove.6B.zip"
50
+ zip_path = glove_file.parent / "glove.6B.zip"
51
+
52
+ try:
53
+ print("Downloading GloVe embeddings (862 MB)...")
54
+ response = requests.get(url, stream=True)
55
+ response.raise_for_status()
56
+
57
+ total_size = int(response.headers.get('content-length', 0))
58
+ downloaded_size = 0
59
+
60
+ with open(zip_path, 'wb') as f:
61
+ for chunk in response.iter_content(chunk_size=8192):
62
+ if chunk:
63
+ f.write(chunk)
64
+ downloaded_size += len(chunk)
65
+ if total_size > 0:
66
+ progress = (downloaded_size / total_size) * 100
67
+ print(".1f", end='', flush=True)
68
+
69
+ print("\nDownload complete. Extracting...")
70
+
71
+ # Extract the specific file we need
72
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
73
+ zip_ref.extract('glove.6B.100d.txt', glove_file.parent)
74
+
75
+ # Clean up zip file
76
+ zip_path.unlink()
77
+
78
+ print(f"GloVe embeddings extracted to {self.glove_path}")
79
+
80
+ except Exception as e:
81
+ print(f"Failed to download GloVe embeddings: {e}")
82
+ print("Please download manually from: https://nlp.stanford.edu/data/glove.6B.zip")
83
+ raise
84
+
24
85
  def _load_embeddings(self):
25
86
  """Load GloVe word embeddings from file."""
87
+ if not os.path.exists(self.glove_path):
88
+ self._download_glove_embeddings()
89
+
26
90
  try:
91
+ print(f"Loading GloVe embeddings from {self.glove_path}...")
27
92
  with open(self.glove_path, 'r', encoding='utf-8') as f:
28
93
  for line in f:
29
94
  values = line.split()
30
95
  word = values[0]
31
96
  coefs = np.asarray(values[1:], dtype='float32')
32
97
  self.word_embeddings[word] = coefs
98
+ print(f"Loaded {len(self.word_embeddings)} word embeddings.")
33
99
  except FileNotFoundError:
34
100
  raise FileNotFoundError(f"GloVe file not found at {self.glove_path}")
101
+ except Exception as e:
102
+ raise Exception(f"Error loading GloVe embeddings: {e}")
35
103
 
36
104
  def load_data(self):
37
105
  """Load data interactively."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: text-summarizer-aweebtaku
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
5
5
  Home-page: https://github.com/AWeebTaku/Summarizer
6
6
  Author: Your Name
@@ -22,6 +22,7 @@ Requires-Dist: numpy
22
22
  Requires-Dist: nltk
23
23
  Requires-Dist: scikit-learn
24
24
  Requires-Dist: networkx
25
+ Requires-Dist: requests
25
26
  Dynamic: author
26
27
  Dynamic: author-email
27
28
  Dynamic: classifier
@@ -75,15 +76,13 @@ pip install -e .
75
76
 
76
77
  ### Download GloVe Embeddings
77
78
 
78
- The tool requires GloVe word embeddings. Download the 100d version:
79
+ **No manual download required!** The package will automatically download GloVe embeddings (100d, ~400MB) on first use and cache them in your home directory (`~/.text_summarizer/`).
79
80
 
80
- ```bash
81
- wget http://nlp.stanford.edu/data/glove.6B.zip
82
- unzip glove.6B.zip
81
+ If you prefer to use your own GloVe file, you can specify the path:
82
+ ```python
83
+ summarizer = TextSummarizer(glove_path='path/to/your/glove.6B.100d.txt')
83
84
  ```
84
85
 
85
- Place the `glove.6B.100d.txt` file in the project root or specify the path.
86
-
87
86
  ## Usage
88
87
 
89
88
  ### Command Line Interface