text-summarizer-aweebtaku 1.0.1__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {text_summarizer_aweebtaku-1.0.1/text_summarizer_aweebtaku.egg-info → text_summarizer_aweebtaku-1.0.2}/PKG-INFO +4 -11
  2. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/README.md +3 -10
  3. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/setup.py +2 -2
  4. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer/summarizer.py +50 -1
  5. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2/text_summarizer_aweebtaku.egg-info}/PKG-INFO +4 -11
  6. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/LICENSE +0 -0
  7. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/MANIFEST.in +0 -0
  8. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/requirements.txt +0 -0
  9. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/setup.cfg +0 -0
  10. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer/__init__.py +0 -0
  11. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer/cli.py +0 -0
  12. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer/data/tennis.csv +0 -0
  13. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer/ui.py +0 -0
  14. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer_aweebtaku.egg-info/SOURCES.txt +0 -0
  15. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer_aweebtaku.egg-info/dependency_links.txt +0 -0
  16. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer_aweebtaku.egg-info/entry_points.txt +0 -0
  17. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer_aweebtaku.egg-info/requires.txt +0 -0
  18. {text_summarizer_aweebtaku-1.0.1 → text_summarizer_aweebtaku-1.0.2}/text_summarizer_aweebtaku.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: text-summarizer-aweebtaku
3
- Version: 1.0.1
3
+ Version: 1.0.2
4
4
  Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
5
5
  Home-page: https://github.com/AWeebTaku/Summarizer
6
6
  Author: Your Name
@@ -60,6 +60,8 @@ A Python-based text summarization tool that uses GloVe word embeddings and PageR
60
60
  pip install text-summarizer-aweebtaku
61
61
  ```
62
62
 
63
+ **Note:** This package includes the GloVe word embeddings file (~400MB), so the installation may take some time.
64
+
63
65
  ### Install from Source
64
66
 
65
67
  1. Clone the repository:
@@ -73,16 +75,7 @@ cd Summarizer
73
75
  pip install -e .
74
76
  ```
75
77
 
76
- ### Download GloVe Embeddings
77
-
78
- The tool requires GloVe word embeddings. Download the 100d version:
79
-
80
- ```bash
81
- wget http://nlp.stanford.edu/data/glove.6B.zip
82
- unzip glove.6B.zip
83
- ```
84
-
85
- Place the `glove.6B.100d.txt` file in the project root or specify the path.
78
+ **Note:** The GloVe word embeddings are included in the package, so no additional download is required.
86
79
 
87
80
  ## Usage
88
81
 
@@ -24,6 +24,8 @@ A Python-based text summarization tool that uses GloVe word embeddings and PageR
24
24
  pip install text-summarizer-aweebtaku
25
25
  ```
26
26
 
27
+ **Note:** This package includes the GloVe word embeddings file (~400MB), so the installation may take some time.
28
+
27
29
  ### Install from Source
28
30
 
29
31
  1. Clone the repository:
@@ -37,16 +39,7 @@ cd Summarizer
37
39
  pip install -e .
38
40
  ```
39
41
 
40
- ### Download GloVe Embeddings
41
-
42
- The tool requires GloVe word embeddings. Download the 100d version:
43
-
44
- ```bash
45
- wget http://nlp.stanford.edu/data/glove.6B.zip
46
- unzip glove.6B.zip
47
- ```
48
-
49
- Place the `glove.6B.100d.txt` file in the project root or specify the path.
42
+ **Note:** The GloVe word embeddings are included in the package, so no additional download is required.
50
43
 
51
44
  ## Usage
52
45
 
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as fh:
8
8
 
9
9
  setup(
10
10
  name="text-summarizer-aweebtaku",
11
- version="1.0.1",
11
+ version="1.0.2",
12
12
  author="Your Name",
13
13
  author_email="your.email@example.com",
14
14
  description="A text summarization tool using GloVe embeddings and PageRank algorithm",
@@ -37,6 +37,6 @@ setup(
37
37
  },
38
38
  include_package_data=True,
39
39
  package_data={
40
- "textsummarizer": ["data/*.csv"],
40
+ "text_summarizer": ["data/*.csv"],
41
41
  },
42
42
  )
@@ -6,6 +6,10 @@ from nltk.tokenize import sent_tokenize
6
6
  from nltk.corpus import stopwords
7
7
  from sklearn.metrics.pairwise import cosine_similarity
8
8
  import networkx as nx
9
+ import pkg_resources
10
+ import urllib.request
11
+ import zipfile
12
+ import shutil
9
13
 
10
14
  # Download necessary NLTK data
11
15
  # nltk.download('punkt_tab')
@@ -14,13 +18,58 @@ import networkx as nx
14
18
  class TextSummarizer:
15
19
  """A class for summarizing text documents using GloVe embeddings and PageRank."""
16
20
 
17
- def __init__(self, glove_path='glove.6B.100d.txt/glove.6B.100d.txt', num_sentences=5):
21
+ def __init__(self, glove_path=None, num_sentences=5):
22
+ if glove_path is None:
23
+ # Try to find GloVe file in package data first
24
+ try:
25
+ glove_path = pkg_resources.resource_filename('text_summarizer', 'glove.6B.100d.txt/glove.6B.100d.txt')
26
+ except (FileNotFoundError, ModuleNotFoundError):
27
+ # Fallback to default path
28
+ glove_path = 'glove.6B.100d.txt/glove.6B.100d.txt'
29
+
30
+ # Download GloVe if it doesn't exist
31
+ if not os.path.exists(glove_path):
32
+ print("GloVe embeddings not found. Downloading...")
33
+ self._download_glove()
34
+
18
35
  self.glove_path = glove_path
19
36
  self.num_sentences = num_sentences
20
37
  self.word_embeddings = {}
21
38
  self.stop_words = set(stopwords.words('english'))
22
39
  self._load_embeddings()
23
40
 
41
+ def _download_glove(self):
42
+ """Download and extract GloVe embeddings."""
43
+ glove_dir = 'glove.6B.100d.txt'
44
+ glove_file = os.path.join(glove_dir, 'glove.6B.100d.txt')
45
+ zip_url = 'http://nlp.stanford.edu/data/glove.6B.zip'
46
+ zip_path = 'glove.6B.zip'
47
+
48
+ try:
49
+ # Download the zip file
50
+ print(f"Downloading GloVe from {zip_url}...")
51
+ urllib.request.urlretrieve(zip_url, zip_path)
52
+
53
+ # Extract the specific file we need
54
+ print("Extracting GloVe embeddings...")
55
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
56
+ # Create directory if it doesn't exist
57
+ os.makedirs(glove_dir, exist_ok=True)
58
+ # Extract only the 100d file
59
+ zip_ref.extract('glove.6B.100d.txt', '.')
60
+ # Move to the expected directory
61
+ if os.path.exists('glove.6B.100d.txt'):
62
+ shutil.move('glove.6B.100d.txt', glove_file)
63
+
64
+ # Clean up
65
+ os.remove(zip_path)
66
+ print("GloVe embeddings downloaded successfully!")
67
+
68
+ except Exception as e:
69
+ print(f"Failed to download GloVe embeddings: {e}")
70
+ print("Please download manually from: http://nlp.stanford.edu/data/glove.6B.zip")
71
+ raise
72
+
24
73
  def _load_embeddings(self):
25
74
  """Load GloVe word embeddings from file."""
26
75
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: text-summarizer-aweebtaku
3
- Version: 1.0.1
3
+ Version: 1.0.2
4
4
  Summary: A text summarization tool using GloVe embeddings and PageRank algorithm
5
5
  Home-page: https://github.com/AWeebTaku/Summarizer
6
6
  Author: Your Name
@@ -60,6 +60,8 @@ A Python-based text summarization tool that uses GloVe word embeddings and PageR
60
60
  pip install text-summarizer-aweebtaku
61
61
  ```
62
62
 
63
+ **Note:** This package includes the GloVe word embeddings file (~400MB), so the installation may take some time.
64
+
63
65
  ### Install from Source
64
66
 
65
67
  1. Clone the repository:
@@ -73,16 +75,7 @@ cd Summarizer
73
75
  pip install -e .
74
76
  ```
75
77
 
76
- ### Download GloVe Embeddings
77
-
78
- The tool requires GloVe word embeddings. Download the 100d version:
79
-
80
- ```bash
81
- wget http://nlp.stanford.edu/data/glove.6B.zip
82
- unzip glove.6B.zip
83
- ```
84
-
85
- Place the `glove.6B.100d.txt` file in the project root or specify the path.
78
+ **Note:** The GloVe word embeddings are included in the package, so no additional download is required.
86
79
 
87
80
  ## Usage
88
81