PyPI - text-summarizer-aweebtaku - Versions diffs - 1.0.1__tar.gz → 1.1.0__tar.gz - Mend

text-summarizer-aweebtaku 1.0.1tar.gz → 1.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{text_summarizer_aweebtaku-1.0.1/text_summarizer_aweebtaku.egg-info → text_summarizer_aweebtaku-1.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: text-summarizer-aweebtaku
-Version: 1.0.1
+Version: 1.1.0
 Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
 Home-page: https://github.com/AWeebTaku/Summarizer
 Author: Your Name
@@ -22,6 +22,7 @@ Requires-Dist: numpy
 Requires-Dist: nltk
 Requires-Dist: scikit-learn
 Requires-Dist: networkx
+Requires-Dist: requests
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier
@@ -75,15 +76,13 @@ pip install -e .
 ### Download GloVe Embeddings
-The tool requires GloVe word embeddings. Download the 100d version:
+**No manual download required!** The package will automatically download GloVe embeddings (100d, ~400MB) on first use and cache them in your home directory (`~/.text_summarizer/`).
-```bash
-wget http://nlp.stanford.edu/data/glove.6B.zip
-unzip glove.6B.zip
+If you prefer to use your own GloVe file, you can specify the path:
+```python
+summarizer = TextSummarizer(glove_path='path/to/your/glove.6B.100d.txt')
 ```
-Place the `glove.6B.100d.txt` file in the project root or specify the path.
 ## Usage
 ### Command Line Interface

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/README.md RENAMED Viewed

@@ -39,15 +39,13 @@ pip install -e .
 ### Download GloVe Embeddings
-The tool requires GloVe word embeddings. Download the 100d version:
+**No manual download required!** The package will automatically download GloVe embeddings (100d, ~400MB) on first use and cache them in your home directory (`~/.text_summarizer/`).
-```bash
-wget http://nlp.stanford.edu/data/glove.6B.zip
-unzip glove.6B.zip
+If you prefer to use your own GloVe file, you can specify the path:
+```python
+summarizer = TextSummarizer(glove_path='path/to/your/glove.6B.100d.txt')
 ```
-Place the `glove.6B.100d.txt` file in the project root or specify the path.
 ## Usage
 ### Command Line Interface

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/requirements.txt RENAMED Viewed

@@ -2,4 +2,5 @@ pandas
 numpy
 nltk
 scikit-learn
-networkx
+networkx
+requests

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
 setup(
     name="text-summarizer-aweebtaku",
-    version="1.0.1",
+    version="1.1.0",
     author="Your Name",
     author_email="your.email@example.com",
     description="A text summarization tool using GloVe embeddings and PageRank algorithm",

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0}/text_summarizer/summarizer.py RENAMED Viewed

@@ -2,6 +2,9 @@ import pandas as pd
 import numpy as np
 import nltk
 import os
+import requests
+import zipfile
+from pathlib import Path
 from nltk.tokenize import sent_tokenize
 from nltk.corpus import stopwords
 from sklearn.metrics.pairwise import cosine_similarity
@@ -14,24 +17,89 @@ import networkx as nx
 class TextSummarizer:
     """A class for summarizing text documents using GloVe embeddings and PageRank."""
-    def __init__(self, glove_path='glove.6B.100d.txt/glove.6B.100d.txt', num_sentences=5):
-        self.glove_path = glove_path
+    def __init__(self, glove_path=None, num_sentences=5):
         self.num_sentences = num_sentences
         self.word_embeddings = {}
         self.stop_words = set(stopwords.words('english'))
+        # Set default GloVe path
+        if glove_path is None:
+            glove_path = self._get_default_glove_path()
+        self.glove_path = glove_path
         self._load_embeddings()
+    def _get_default_glove_path(self):
+        """Get the default path for GloVe embeddings."""
+        # Use user's home directory for data
+        home_dir = Path.home()
+        glove_dir = home_dir / '.text_summarizer'
+        glove_dir.mkdir(exist_ok=True)
+        return glove_dir / 'glove.6B.100d.txt'
+    def _download_glove_embeddings(self):
+        """Download GloVe embeddings if not present."""
+        print("GloVe embeddings not found. Downloading from Stanford NLP...")
+        # Create directory if it doesn't exist
+        glove_file = Path(self.glove_path)
+        glove_file.parent.mkdir(exist_ok=True)
+        # Download the zip file
+        url = "https://nlp.stanford.edu/data/glove.6B.zip"
+        zip_path = glove_file.parent / "glove.6B.zip"
+        try:
+            print("Downloading GloVe embeddings (862 MB)...")
+            response = requests.get(url, stream=True)
+            response.raise_for_status()
+            total_size = int(response.headers.get('content-length', 0))
+            downloaded_size = 0
+            with open(zip_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        downloaded_size += len(chunk)
+                        if total_size > 0:
+                            progress = (downloaded_size / total_size) * 100
+                            print(".1f", end='', flush=True)
+            print("\nDownload complete. Extracting...")
+            # Extract the specific file we need
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                zip_ref.extract('glove.6B.100d.txt', glove_file.parent)
+            # Clean up zip file
+            zip_path.unlink()
+            print(f"GloVe embeddings extracted to {self.glove_path}")
+        except Exception as e:
+            print(f"Failed to download GloVe embeddings: {e}")
+            print("Please download manually from: https://nlp.stanford.edu/data/glove.6B.zip")
+            raise
     def _load_embeddings(self):
         """Load GloVe word embeddings from file."""
+        if not os.path.exists(self.glove_path):
+            self._download_glove_embeddings()
         try:
+            print(f"Loading GloVe embeddings from {self.glove_path}...")
             with open(self.glove_path, 'r', encoding='utf-8') as f:
                 for line in f:
                     values = line.split()
                     word = values[0]
                     coefs = np.asarray(values[1:], dtype='float32')
                     self.word_embeddings[word] = coefs
+            print(f"Loaded {len(self.word_embeddings)} word embeddings.")
         except FileNotFoundError:
             raise FileNotFoundError(f"GloVe file not found at {self.glove_path}")
+        except Exception as e:
+            raise Exception(f"Error loading GloVe embeddings: {e}")
     def load_data(self):
         """Load data interactively."""

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.1.0/text_summarizer_aweebtaku.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: text-summarizer-aweebtaku
-Version: 1.0.1
+Version: 1.1.0
 Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
 Home-page: https://github.com/AWeebTaku/Summarizer
 Author: Your Name
@@ -22,6 +22,7 @@ Requires-Dist: numpy
 Requires-Dist: nltk
 Requires-Dist: scikit-learn
 Requires-Dist: networkx
+Requires-Dist: requests
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier
@@ -75,15 +76,13 @@ pip install -e .
 ### Download GloVe Embeddings
-The tool requires GloVe word embeddings. Download the 100d version:
+**No manual download required!** The package will automatically download GloVe embeddings (100d, ~400MB) on first use and cache them in your home directory (`~/.text_summarizer/`).
-```bash
-wget http://nlp.stanford.edu/data/glove.6B.zip
-unzip glove.6B.zip
+If you prefer to use your own GloVe file, you can specify the path:
+```python
+summarizer = TextSummarizer(glove_path='path/to/your/glove.6B.100d.txt')
 ```
-Place the `glove.6B.100d.txt` file in the project root or specify the path.
 ## Usage
 ### Command Line Interface