PyPI - text-summarizer-aweebtaku - Versions diffs - 1.0.1__tar.gz → 1.0.2__tar.gz - Mend

text-summarizer-aweebtaku 1.0.1tar.gz → 1.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{text_summarizer_aweebtaku-1.0.1/text_summarizer_aweebtaku.egg-info → text_summarizer_aweebtaku-1.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: text-summarizer-aweebtaku
-Version: 1.0.1
+Version: 1.0.2
 Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
 Home-page: https://github.com/AWeebTaku/Summarizer
 Author: Your Name
@@ -60,6 +60,8 @@ A Python-based text summarization tool that uses GloVe word embeddings and PageR
 pip install text-summarizer-aweebtaku
 ```
+**Note:** This package includes the GloVe word embeddings file (~400MB), so the installation may take some time.
 ### Install from Source
 1. Clone the repository:
@@ -73,16 +75,7 @@ cd Summarizer
 pip install -e .
 ```
-### Download GloVe Embeddings
-The tool requires GloVe word embeddings. Download the 100d version:
-```bash
-wget http://nlp.stanford.edu/data/glove.6B.zip
-unzip glove.6B.zip
-```
-Place the `glove.6B.100d.txt` file in the project root or specify the path.
+**Note:** The GloVe word embeddings are included in the package, so no additional download is required.
 ## Usage

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/README.md RENAMED Viewed

@@ -24,6 +24,8 @@ A Python-based text summarization tool that uses GloVe word embeddings and PageR
 pip install text-summarizer-aweebtaku
 ```
+**Note:** This package includes the GloVe word embeddings file (~400MB), so the installation may take some time.
 ### Install from Source
 1. Clone the repository:
@@ -37,16 +39,7 @@ cd Summarizer
 pip install -e .
 ```
-### Download GloVe Embeddings
-The tool requires GloVe word embeddings. Download the 100d version:
-```bash
-wget http://nlp.stanford.edu/data/glove.6B.zip
-unzip glove.6B.zip
-```
-Place the `glove.6B.100d.txt` file in the project root or specify the path.
+**Note:** The GloVe word embeddings are included in the package, so no additional download is required.
 ## Usage

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
 setup(
     name="text-summarizer-aweebtaku",
-    version="1.0.1",
+    version="1.0.2",
     author="Your Name",
     author_email="your.email@example.com",
     description="A text summarization tool using GloVe embeddings and PageRank algorithm",
@@ -37,6 +37,6 @@ setup(
     },
     include_package_data=True,
     package_data={
-        "textsummarizer": ["data/*.csv"],
+        "text_summarizer": ["data/*.csv"],
     },
 )

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer/summarizer.py RENAMED Viewed

@@ -6,6 +6,10 @@ from nltk.tokenize import sent_tokenize
 from nltk.corpus import stopwords
 from sklearn.metrics.pairwise import cosine_similarity
 import networkx as nx
+import pkg_resources
+import urllib.request
+import zipfile
+import shutil
 # Download necessary NLTK data
 # nltk.download('punkt_tab')
@@ -14,13 +18,58 @@ import networkx as nx
 class TextSummarizer:
     """A class for summarizing text documents using GloVe embeddings and PageRank."""
-    def __init__(self, glove_path='glove.6B.100d.txt/glove.6B.100d.txt', num_sentences=5):
+    def __init__(self, glove_path=None, num_sentences=5):
+        if glove_path is None:
+            # Try to find GloVe file in package data first
+            try:
+                glove_path = pkg_resources.resource_filename('text_summarizer', 'glove.6B.100d.txt/glove.6B.100d.txt')
+            except (FileNotFoundError, ModuleNotFoundError):
+                # Fallback to default path
+                glove_path = 'glove.6B.100d.txt/glove.6B.100d.txt'
+        # Download GloVe if it doesn't exist
+        if not os.path.exists(glove_path):
+            print("GloVe embeddings not found. Downloading...")
+            self._download_glove()
         self.glove_path = glove_path
         self.num_sentences = num_sentences
         self.word_embeddings = {}
         self.stop_words = set(stopwords.words('english'))
         self._load_embeddings()
+    def _download_glove(self):
+        """Download and extract GloVe embeddings."""
+        glove_dir = 'glove.6B.100d.txt'
+        glove_file = os.path.join(glove_dir, 'glove.6B.100d.txt')
+        zip_url = 'http://nlp.stanford.edu/data/glove.6B.zip'
+        zip_path = 'glove.6B.zip'
+        try:
+            # Download the zip file
+            print(f"Downloading GloVe from {zip_url}...")
+            urllib.request.urlretrieve(zip_url, zip_path)
+            # Extract the specific file we need
+            print("Extracting GloVe embeddings...")
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                # Create directory if it doesn't exist
+                os.makedirs(glove_dir, exist_ok=True)
+                # Extract only the 100d file
+                zip_ref.extract('glove.6B.100d.txt', '.')
+                # Move to the expected directory
+                if os.path.exists('glove.6B.100d.txt'):
+                    shutil.move('glove.6B.100d.txt', glove_file)
+            # Clean up
+            os.remove(zip_path)
+            print("GloVe embeddings downloaded successfully!")
+        except Exception as e:
+            print(f"Failed to download GloVe embeddings: {e}")
+            print("Please download manually from: http://nlp.stanford.edu/data/glove.6B.zip")
+            raise
     def _load_embeddings(self):
         """Load GloVe word embeddings from file."""
         try:

{text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2/text_summarizer_aweebtaku.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: text-summarizer-aweebtaku
-Version: 1.0.1
+Version: 1.0.2
 Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
 Home-page: https://github.com/AWeebTaku/Summarizer
 Author: Your Name
@@ -60,6 +60,8 @@ A Python-based text summarization tool that uses GloVe word embeddings and PageR
 pip install text-summarizer-aweebtaku
 ```
+**Note:** This package includes the GloVe word embeddings file (~400MB), so the installation may take some time.
 ### Install from Source
 1. Clone the repository:
@@ -73,16 +75,7 @@ cd Summarizer
 pip install -e .
 ```
-### Download GloVe Embeddings
-The tool requires GloVe word embeddings. Download the 100d version:
-```bash
-wget http://nlp.stanford.edu/data/glove.6B.zip
-unzip glove.6B.zip
-```
-Place the `glove.6B.100d.txt` file in the project root or specify the path.
+**Note:** The GloVe word embeddings are included in the package, so no additional download is required.
 ## Usage