text-summarizer-aweebtaku 1.2.4__py3-none-any.whl → 1.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- text_summarizer/__init__.py +3 -3
- text_summarizer/cli.py +96 -88
- text_summarizer/create_shortcuts.py +63 -63
- text_summarizer/summarizer.py +322 -322
- text_summarizer/ui.py +379 -379
- {text_summarizer_aweebtaku-1.2.4.dist-info/licenses → text_summarizer_aweebtaku-1.2.6.dist-info}/LICENSE +20 -20
- {text_summarizer_aweebtaku-1.2.4.dist-info → text_summarizer_aweebtaku-1.2.6.dist-info}/METADATA +206 -207
- text_summarizer_aweebtaku-1.2.6.dist-info/RECORD +12 -0
- {text_summarizer_aweebtaku-1.2.4.dist-info → text_summarizer_aweebtaku-1.2.6.dist-info}/WHEEL +1 -1
- text_summarizer/data/tennis.csv +0 -9
- text_summarizer_aweebtaku-1.2.4.dist-info/RECORD +0 -13
- {text_summarizer_aweebtaku-1.2.4.dist-info → text_summarizer_aweebtaku-1.2.6.dist-info}/entry_points.txt +0 -0
- {text_summarizer_aweebtaku-1.2.4.dist-info → text_summarizer_aweebtaku-1.2.6.dist-info}/top_level.txt +0 -0
text_summarizer/ui.py
CHANGED
|
@@ -1,380 +1,380 @@
|
|
|
1
|
-
import tkinter as tk
|
|
2
|
-
from tkinter import ttk, filedialog, messagebox, scrolledtext
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import threading
|
|
5
|
-
from .summarizer import TextSummarizer
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class TextSummarizerUI:
|
|
9
|
-
"""A GUI application for text summarization."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, root):
|
|
12
|
-
self.root = root
|
|
13
|
-
self.root.title("Text Summarizer")
|
|
14
|
-
self.root.geometry("1000x700")
|
|
15
|
-
self.root.resizable(True, True)
|
|
16
|
-
self.root.configure(bg='#f0f0f0')
|
|
17
|
-
|
|
18
|
-
# Configure styles
|
|
19
|
-
style = ttk.Style()
|
|
20
|
-
style.configure('TButton', font=('Arial', 10))
|
|
21
|
-
style.configure('TLabel', font=('Arial', 10))
|
|
22
|
-
style.configure('TFrame', background='#f0f0f0')
|
|
23
|
-
|
|
24
|
-
self.summarizer = None
|
|
25
|
-
self.df = None
|
|
26
|
-
self.scored_sentences = None
|
|
27
|
-
self.is_single = False
|
|
28
|
-
|
|
29
|
-
self.create_widgets()
|
|
30
|
-
|
|
31
|
-
def create_widgets(self):
|
|
32
|
-
"""Create and layout all UI widgets."""
|
|
33
|
-
# Main frame
|
|
34
|
-
main_frame = ttk.Frame(self.root, padding="10")
|
|
35
|
-
main_frame.pack(fill=tk.BOTH, expand=True)
|
|
36
|
-
|
|
37
|
-
# Title
|
|
38
|
-
title_label = ttk.Label(main_frame, text="Text Summarizer", font=("Arial", 16, "bold"))
|
|
39
|
-
title_label.pack(pady=10)
|
|
40
|
-
|
|
41
|
-
# Data loading section
|
|
42
|
-
load_frame = ttk.LabelFrame(main_frame, text="Load Data", padding="10")
|
|
43
|
-
load_frame.pack(fill=tk.X, pady=10)
|
|
44
|
-
|
|
45
|
-
ttk.Button(load_frame, text="Paste Single Document", command=self.paste_single).pack(side=tk.LEFT, padx=5)
|
|
46
|
-
ttk.Button(load_frame, text="Upload CSV", command=self.upload_csv).pack(side=tk.LEFT, padx=5)
|
|
47
|
-
ttk.Button(load_frame, text="Create CSV", command=self.create_csv).pack(side=tk.LEFT, padx=5)
|
|
48
|
-
|
|
49
|
-
# Status
|
|
50
|
-
self.status_label = ttk.Label(main_frame, text="Ready to load data")
|
|
51
|
-
self.status_label.pack(pady=5)
|
|
52
|
-
|
|
53
|
-
# Summarization section
|
|
54
|
-
self.sum_frame = ttk.LabelFrame(main_frame, text="Summarization", padding="10")
|
|
55
|
-
self.sum_frame.pack(fill=tk.BOTH, expand=True, pady=10)
|
|
56
|
-
|
|
57
|
-
self.update_summarization_ui()
|
|
58
|
-
|
|
59
|
-
# Results display
|
|
60
|
-
results_frame = ttk.LabelFrame(main_frame, text="Results", padding="10")
|
|
61
|
-
results_frame.pack(fill=tk.BOTH, expand=True, pady=10)
|
|
62
|
-
|
|
63
|
-
# Original document display
|
|
64
|
-
original_frame = ttk.Frame(results_frame)
|
|
65
|
-
original_frame.pack(fill=tk.BOTH, expand=True, pady=5)
|
|
66
|
-
label_frame = ttk.Frame(original_frame)
|
|
67
|
-
label_frame.pack(fill=tk.X)
|
|
68
|
-
ttk.Label(label_frame, text="Original Document:").pack(side=tk.LEFT)
|
|
69
|
-
ttk.Button(label_frame, text="View Full", command=self.view_original).pack(side=tk.RIGHT)
|
|
70
|
-
self.original_text = scrolledtext.ScrolledText(original_frame, wrap=tk.WORD, height=8)
|
|
71
|
-
self.original_text.pack(fill=tk.BOTH, expand=True)
|
|
72
|
-
self.original_text.config(state='disabled')
|
|
73
|
-
|
|
74
|
-
# Summary display
|
|
75
|
-
summary_frame = ttk.Frame(results_frame)
|
|
76
|
-
summary_frame.pack(fill=tk.BOTH, expand=True, pady=5)
|
|
77
|
-
label_frame2 = ttk.Frame(summary_frame)
|
|
78
|
-
label_frame2.pack(fill=tk.X)
|
|
79
|
-
ttk.Label(label_frame2, text="Summary:").pack(side=tk.LEFT)
|
|
80
|
-
ttk.Button(label_frame2, text="View Full", command=self.view_summary).pack(side=tk.RIGHT)
|
|
81
|
-
self.summary_text = scrolledtext.ScrolledText(summary_frame, wrap=tk.WORD, height=8)
|
|
82
|
-
self.summary_text.pack(fill=tk.BOTH, expand=True)
|
|
83
|
-
self.summary_text.config(state='disabled')
|
|
84
|
-
|
|
85
|
-
# Bottom frame for Clear All and Save Summaries buttons
|
|
86
|
-
bottom_frame = ttk.Frame(main_frame)
|
|
87
|
-
bottom_frame.pack(side=tk.BOTTOM, fill=tk.X, pady=1)
|
|
88
|
-
ttk.Button(bottom_frame, text="Save Summaries", command=self.save_summaries).pack(side=tk.RIGHT, padx=5)
|
|
89
|
-
ttk.Button(bottom_frame, text="Clear All", command=self.clear_all).pack(side=tk.RIGHT, padx=5)
|
|
90
|
-
|
|
91
|
-
def update_summarization_ui(self):
|
|
92
|
-
"""Update the summarization UI based on data type (single or multiple)."""
|
|
93
|
-
# Clear existing widgets in sum_frame
|
|
94
|
-
for widget in self.sum_frame.winfo_children():
|
|
95
|
-
widget.destroy()
|
|
96
|
-
|
|
97
|
-
if self.is_single:
|
|
98
|
-
ttk.Button(self.sum_frame, text="Summarize", command=self.summarize_single).pack(side=tk.LEFT, padx=5)
|
|
99
|
-
else:
|
|
100
|
-
ttk.Button(self.sum_frame, text="Summarize Single Document", command=self.summarize_single).pack(side=tk.LEFT, padx=5)
|
|
101
|
-
ttk.Button(self.sum_frame, text="Summarize All Documents", command=self.summarize_all).pack(side=tk.LEFT, padx=5)
|
|
102
|
-
|
|
103
|
-
# Article ID input
|
|
104
|
-
id_frame = ttk.Frame(self.sum_frame)
|
|
105
|
-
id_frame.pack(side=tk.LEFT, padx=10)
|
|
106
|
-
ttk.Label(id_frame, text="Document ID:").pack(side=tk.LEFT)
|
|
107
|
-
self.article_id_entry = ttk.Entry(id_frame, width=10)
|
|
108
|
-
self.article_id_entry.pack(side=tk.LEFT, padx=5)
|
|
109
|
-
|
|
110
|
-
def save_summaries(self):
|
|
111
|
-
"""Save the generated summaries to a CSV file."""
|
|
112
|
-
if self.scored_sentences is None or self.df is None:
|
|
113
|
-
messagebox.showwarning("Warning", "No summaries to save. Please summarize first.")
|
|
114
|
-
return
|
|
115
|
-
if self.is_single:
|
|
116
|
-
# Summarize the single document
|
|
117
|
-
article_id = 1
|
|
118
|
-
article_text, summary = self.summarizer.summarize_article(self.scored_sentences, article_id, self.df)
|
|
119
|
-
data = [{"article_id": article_id, "article_text": article_text, "summary": summary}]
|
|
120
|
-
else:
|
|
121
|
-
# Summarize all documents
|
|
122
|
-
summaries = self.summarizer.summarize_all_articles(self.scored_sentences, self.df)
|
|
123
|
-
data = []
|
|
124
|
-
for article_id, d in summaries.items():
|
|
125
|
-
data.append({"article_id": article_id, "article_text": d["article"], "summary": d["summary"]})
|
|
126
|
-
file_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
|
|
127
|
-
if file_path:
|
|
128
|
-
try:
|
|
129
|
-
df = pd.DataFrame(data)
|
|
130
|
-
df.to_csv(file_path, index=False)
|
|
131
|
-
messagebox.showinfo("Success", f"Summaries saved to {file_path}")
|
|
132
|
-
except Exception as e:
|
|
133
|
-
messagebox.showerror("Error", f"Failed to save summaries: {str(e)}")
|
|
134
|
-
|
|
135
|
-
def clear_all(self):
|
|
136
|
-
"""Clear all data and reset the UI."""
|
|
137
|
-
self.is_single = False
|
|
138
|
-
self.df = None
|
|
139
|
-
self.scored_sentences = None
|
|
140
|
-
self.summarizer = None
|
|
141
|
-
self.status_label.config(text="Ready to load data")
|
|
142
|
-
self.update_summarization_ui()
|
|
143
|
-
self.original_text.config(state='normal')
|
|
144
|
-
self.original_text.delete(1.0, tk.END)
|
|
145
|
-
self.original_text.config(state='disabled')
|
|
146
|
-
self.summary_text.config(state='normal')
|
|
147
|
-
self.summary_text.delete(1.0, tk.END)
|
|
148
|
-
self.summary_text.config(state='disabled')
|
|
149
|
-
|
|
150
|
-
def paste_single(self):
|
|
151
|
-
"""Open dialog to paste a single document."""
|
|
152
|
-
dialog = PasteDialog(self.root)
|
|
153
|
-
self.root.wait_window(dialog.top)
|
|
154
|
-
if dialog.result:
|
|
155
|
-
self.df = pd.DataFrame([{'article_id': 1, 'article_text': dialog.result}])
|
|
156
|
-
self.is_single = True
|
|
157
|
-
self.status_label.config(text="Single document loaded")
|
|
158
|
-
self.update_summarization_ui()
|
|
159
|
-
self.initialize_summarizer()
|
|
160
|
-
|
|
161
|
-
def upload_csv(self):
|
|
162
|
-
"""Upload and load a CSV file with documents."""
|
|
163
|
-
file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
|
|
164
|
-
if file_path:
|
|
165
|
-
try:
|
|
166
|
-
self.df = pd.read_csv(file_path)
|
|
167
|
-
self.is_single = False
|
|
168
|
-
self.status_label.config(text=f"CSV loaded from {file_path}")
|
|
169
|
-
self.update_summarization_ui()
|
|
170
|
-
self.initialize_summarizer()
|
|
171
|
-
except Exception as e:
|
|
172
|
-
messagebox.showerror("Error", f"Failed to load CSV: {str(e)}")
|
|
173
|
-
|
|
174
|
-
def create_csv(self):
|
|
175
|
-
"""Open dialog to create a new CSV with multiple documents."""
|
|
176
|
-
dialog = CreateCSVDialog(self.root)
|
|
177
|
-
self.root.wait_window(dialog.top)
|
|
178
|
-
if dialog.result:
|
|
179
|
-
self.df = pd.DataFrame(dialog.result)
|
|
180
|
-
self.is_single = False
|
|
181
|
-
self.status_label.config(text="CSV created")
|
|
182
|
-
self.update_summarization_ui()
|
|
183
|
-
self.initialize_summarizer()
|
|
184
|
-
|
|
185
|
-
def initialize_summarizer(self):
|
|
186
|
-
"""Initialize the summarizer and start processing data in a thread."""
|
|
187
|
-
if self.df is not None and not self.df.empty:
|
|
188
|
-
self.summarizer = TextSummarizer()
|
|
189
|
-
self.status_label.config(text="Processing data...")
|
|
190
|
-
threading.Thread(target=self.process_data).start()
|
|
191
|
-
|
|
192
|
-
def process_data(self):
|
|
193
|
-
"""Process the data to compute sentence scores."""
|
|
194
|
-
try:
|
|
195
|
-
self.scored_sentences = self.summarizer.run_summarization(self.df)
|
|
196
|
-
self.status_label.config(text="Data processed successfully")
|
|
197
|
-
except Exception as e:
|
|
198
|
-
messagebox.showerror("Error", f"Processing failed: {str(e)}")
|
|
199
|
-
|
|
200
|
-
def summarize_single(self):
|
|
201
|
-
"""Summarize a single document."""
|
|
202
|
-
if self.scored_sentences is None:
|
|
203
|
-
messagebox.showwarning("Warning", "Please load and process data first")
|
|
204
|
-
return
|
|
205
|
-
if self.is_single:
|
|
206
|
-
article_id = 1
|
|
207
|
-
else:
|
|
208
|
-
try:
|
|
209
|
-
article_id = int(self.article_id_entry.get())
|
|
210
|
-
except ValueError:
|
|
211
|
-
messagebox.showerror("Error", "Invalid Document ID")
|
|
212
|
-
return
|
|
213
|
-
article_text, summary = self.summarizer.summarize_article(self.scored_sentences, article_id, self.df)
|
|
214
|
-
if article_text and summary:
|
|
215
|
-
self.display_result(article_text, summary)
|
|
216
|
-
else:
|
|
217
|
-
messagebox.showerror("Error", f"Document ID {article_id} not found")
|
|
218
|
-
|
|
219
|
-
def summarize_all(self):
|
|
220
|
-
"""Summarize all documents and display in text areas."""
|
|
221
|
-
if self.scored_sentences is None:
|
|
222
|
-
messagebox.showwarning("Warning", "Please load and process data first")
|
|
223
|
-
return
|
|
224
|
-
summaries = self.summarizer.summarize_all_articles(self.scored_sentences, self.df)
|
|
225
|
-
self.original_text.config(state='normal')
|
|
226
|
-
self.summary_text.config(state='normal')
|
|
227
|
-
self.original_text.delete(1.0, tk.END)
|
|
228
|
-
self.summary_text.delete(1.0, tk.END)
|
|
229
|
-
for article_id, data in summaries.items():
|
|
230
|
-
self.original_text.insert(tk.END, f"Document ID: {article_id}\n{data['article']}\n\n")
|
|
231
|
-
self.summary_text.insert(tk.END, f"Document ID: {article_id}\n{data['summary']}\n\n")
|
|
232
|
-
self.original_text.config(state='disabled')
|
|
233
|
-
self.summary_text.config(state='disabled')
|
|
234
|
-
|
|
235
|
-
def display_result(self, article, summary):
|
|
236
|
-
"""Display the article and summary in the text areas."""
|
|
237
|
-
self.original_text.config(state='normal')
|
|
238
|
-
self.original_text.delete(1.0, tk.END)
|
|
239
|
-
self.original_text.insert(tk.END, article)
|
|
240
|
-
self.original_text.config(state='disabled')
|
|
241
|
-
self.summary_text.config(state='normal')
|
|
242
|
-
self.summary_text.delete(1.0, tk.END)
|
|
243
|
-
self.summary_text.insert(tk.END, summary)
|
|
244
|
-
self.summary_text.config(state='disabled')
|
|
245
|
-
|
|
246
|
-
def view_original(self):
|
|
247
|
-
"""Open a full view window for the original document."""
|
|
248
|
-
top = tk.Toplevel(self.root)
|
|
249
|
-
top.title("Original Document - Full View")
|
|
250
|
-
top.geometry("900x700")
|
|
251
|
-
top.resizable(True, True)
|
|
252
|
-
text = scrolledtext.ScrolledText(top, wrap=tk.WORD)
|
|
253
|
-
text.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
|
254
|
-
text.insert(tk.END, self.original_text.get(1.0, tk.END))
|
|
255
|
-
text.config(state=tk.DISABLED) # Make it read-only
|
|
256
|
-
|
|
257
|
-
def view_summary(self):
|
|
258
|
-
"""Open a full view window for the summary."""
|
|
259
|
-
top = tk.Toplevel(self.root)
|
|
260
|
-
top.title("Summary - Full View")
|
|
261
|
-
top.geometry("900x700")
|
|
262
|
-
top.resizable(True, True)
|
|
263
|
-
text = scrolledtext.ScrolledText(top, wrap=tk.WORD)
|
|
264
|
-
text.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
|
265
|
-
text.insert(tk.END, self.summary_text.get(1.0, tk.END))
|
|
266
|
-
text.config(state=tk.DISABLED) # Make it read-only
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
class PasteDialog:
|
|
270
|
-
"""Dialog for pasting a single document."""
|
|
271
|
-
|
|
272
|
-
def __init__(self, parent):
|
|
273
|
-
self.top = tk.Toplevel(parent)
|
|
274
|
-
self.top.title("Paste Document")
|
|
275
|
-
self.top.geometry("700x600")
|
|
276
|
-
self.top.transient(parent)
|
|
277
|
-
self.top.grab_set()
|
|
278
|
-
self.result = None
|
|
279
|
-
|
|
280
|
-
ttk.Label(self.top, text="Paste your document:").pack(pady=5)
|
|
281
|
-
self.text = scrolledtext.ScrolledText(self.top, wrap=tk.WORD)
|
|
282
|
-
self.text.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
|
283
|
-
|
|
284
|
-
button_frame = ttk.Frame(self.top)
|
|
285
|
-
button_frame.pack(side=tk.BOTTOM, fill=tk.X, pady=5)
|
|
286
|
-
ttk.Button(button_frame, text="OK", command=self.ok).pack(side=tk.RIGHT, padx=5)
|
|
287
|
-
ttk.Button(button_frame, text="Cancel", command=self.cancel).pack(side=tk.RIGHT, padx=5)
|
|
288
|
-
|
|
289
|
-
def ok(self):
|
|
290
|
-
self.result = self.text.get(1.0, tk.END).strip()
|
|
291
|
-
self.top.destroy()
|
|
292
|
-
|
|
293
|
-
def cancel(self):
|
|
294
|
-
self.top.destroy()
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
class CreateCSVDialog:
|
|
298
|
-
"""Dialog for creating a CSV with multiple documents."""
|
|
299
|
-
|
|
300
|
-
def __init__(self, parent):
|
|
301
|
-
self.top = tk.Toplevel(parent)
|
|
302
|
-
self.top.title("Create CSV")
|
|
303
|
-
self.top.geometry("700x600")
|
|
304
|
-
self.top.transient(parent)
|
|
305
|
-
self.top.grab_set()
|
|
306
|
-
self.result = []
|
|
307
|
-
|
|
308
|
-
self.articles = []
|
|
309
|
-
self.counter = 1
|
|
310
|
-
|
|
311
|
-
ttk.Label(self.top, text="Enter documents (ID and text):").pack(pady=5)
|
|
312
|
-
|
|
313
|
-
input_frame = ttk.Frame(self.top)
|
|
314
|
-
input_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
315
|
-
|
|
316
|
-
ttk.Label(input_frame, text="Document ID:").grid(row=0, column=0, sticky=tk.W)
|
|
317
|
-
self.id_entry = ttk.Entry(input_frame, width=10)
|
|
318
|
-
self.id_entry.grid(row=0, column=1, padx=5)
|
|
319
|
-
|
|
320
|
-
ttk.Label(input_frame, text="Document Text:").grid(row=1, column=0, sticky=tk.W)
|
|
321
|
-
self.text_entry = scrolledtext.ScrolledText(input_frame, wrap=tk.WORD, height=5)
|
|
322
|
-
self.text_entry.grid(row=1, column=1, padx=5, pady=5)
|
|
323
|
-
|
|
324
|
-
button_frame = ttk.Frame(self.top)
|
|
325
|
-
button_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
326
|
-
|
|
327
|
-
ttk.Button(button_frame, text="Add Document", command=self.add_article).pack(side=tk.LEFT, padx=5)
|
|
328
|
-
ttk.Button(button_frame, text="Done", command=self.done).pack(side=tk.LEFT, padx=5)
|
|
329
|
-
ttk.Button(button_frame, text="Cancel", command=self.cancel).pack(side=tk.RIGHT, padx=5)
|
|
330
|
-
|
|
331
|
-
self.listbox = tk.Listbox(self.top, height=10)
|
|
332
|
-
self.listbox.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
|
333
|
-
|
|
334
|
-
def add_article(self):
|
|
335
|
-
try:
|
|
336
|
-
article_id = int(self.id_entry.get())
|
|
337
|
-
article_text = self.text_entry.get(1.0, tk.END).strip()
|
|
338
|
-
if article_text:
|
|
339
|
-
self.articles.append({'article_id': article_id, 'article_text': article_text})
|
|
340
|
-
self.listbox.insert(tk.END, f"ID: {article_id} - {article_text[:50]}...")
|
|
341
|
-
self.id_entry.delete(0, tk.END)
|
|
342
|
-
self.text_entry.delete(1.0, tk.END)
|
|
343
|
-
self.counter += 1
|
|
344
|
-
self.id_entry.insert(0, str(self.counter))
|
|
345
|
-
else:
|
|
346
|
-
messagebox.showwarning("Warning", "Document cannot be empty")
|
|
347
|
-
except ValueError:
|
|
348
|
-
messagebox.showerror("Error", "Invalid Document ID")
|
|
349
|
-
|
|
350
|
-
def done(self):
|
|
351
|
-
if self.articles:
|
|
352
|
-
save = messagebox.askyesno("Save CSV", "Do you want to save the CSV file?")
|
|
353
|
-
if save:
|
|
354
|
-
file_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
|
|
355
|
-
if file_path:
|
|
356
|
-
df = pd.DataFrame(self.articles)
|
|
357
|
-
df.to_csv(file_path, index=False)
|
|
358
|
-
try:
|
|
359
|
-
loaded_df = pd.read_csv(file_path)
|
|
360
|
-
self.result = loaded_df.to_dict(orient='records')
|
|
361
|
-
except Exception:
|
|
362
|
-
self.result = self.articles
|
|
363
|
-
else:
|
|
364
|
-
self.result = self.articles
|
|
365
|
-
else:
|
|
366
|
-
self.result = self.articles
|
|
367
|
-
self.top.destroy()
|
|
368
|
-
|
|
369
|
-
def cancel(self):
|
|
370
|
-
self.top.destroy()
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
def main():
|
|
374
|
-
root = tk.Tk()
|
|
375
|
-
app = TextSummarizerUI(root)
|
|
376
|
-
root.mainloop()
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
if __name__ == "__main__":
|
|
1
|
+
import tkinter as tk
|
|
2
|
+
from tkinter import ttk, filedialog, messagebox, scrolledtext
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import threading
|
|
5
|
+
from .summarizer import TextSummarizer
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TextSummarizerUI:
|
|
9
|
+
"""A GUI application for text summarization."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, root):
|
|
12
|
+
self.root = root
|
|
13
|
+
self.root.title("Text Summarizer")
|
|
14
|
+
self.root.geometry("1000x700")
|
|
15
|
+
self.root.resizable(True, True)
|
|
16
|
+
self.root.configure(bg='#f0f0f0')
|
|
17
|
+
|
|
18
|
+
# Configure styles
|
|
19
|
+
style = ttk.Style()
|
|
20
|
+
style.configure('TButton', font=('Arial', 10))
|
|
21
|
+
style.configure('TLabel', font=('Arial', 10))
|
|
22
|
+
style.configure('TFrame', background='#f0f0f0')
|
|
23
|
+
|
|
24
|
+
self.summarizer = None
|
|
25
|
+
self.df = None
|
|
26
|
+
self.scored_sentences = None
|
|
27
|
+
self.is_single = False
|
|
28
|
+
|
|
29
|
+
self.create_widgets()
|
|
30
|
+
|
|
31
|
+
def create_widgets(self):
|
|
32
|
+
"""Create and layout all UI widgets."""
|
|
33
|
+
# Main frame
|
|
34
|
+
main_frame = ttk.Frame(self.root, padding="10")
|
|
35
|
+
main_frame.pack(fill=tk.BOTH, expand=True)
|
|
36
|
+
|
|
37
|
+
# Title
|
|
38
|
+
title_label = ttk.Label(main_frame, text="Text Summarizer", font=("Arial", 16, "bold"))
|
|
39
|
+
title_label.pack(pady=10)
|
|
40
|
+
|
|
41
|
+
# Data loading section
|
|
42
|
+
load_frame = ttk.LabelFrame(main_frame, text="Load Data", padding="10")
|
|
43
|
+
load_frame.pack(fill=tk.X, pady=10)
|
|
44
|
+
|
|
45
|
+
ttk.Button(load_frame, text="Paste Single Document", command=self.paste_single).pack(side=tk.LEFT, padx=5)
|
|
46
|
+
ttk.Button(load_frame, text="Upload CSV", command=self.upload_csv).pack(side=tk.LEFT, padx=5)
|
|
47
|
+
ttk.Button(load_frame, text="Create CSV", command=self.create_csv).pack(side=tk.LEFT, padx=5)
|
|
48
|
+
|
|
49
|
+
# Status
|
|
50
|
+
self.status_label = ttk.Label(main_frame, text="Ready to load data")
|
|
51
|
+
self.status_label.pack(pady=5)
|
|
52
|
+
|
|
53
|
+
# Summarization section
|
|
54
|
+
self.sum_frame = ttk.LabelFrame(main_frame, text="Summarization", padding="10")
|
|
55
|
+
self.sum_frame.pack(fill=tk.BOTH, expand=True, pady=10)
|
|
56
|
+
|
|
57
|
+
self.update_summarization_ui()
|
|
58
|
+
|
|
59
|
+
# Results display
|
|
60
|
+
results_frame = ttk.LabelFrame(main_frame, text="Results", padding="10")
|
|
61
|
+
results_frame.pack(fill=tk.BOTH, expand=True, pady=10)
|
|
62
|
+
|
|
63
|
+
# Original document display
|
|
64
|
+
original_frame = ttk.Frame(results_frame)
|
|
65
|
+
original_frame.pack(fill=tk.BOTH, expand=True, pady=5)
|
|
66
|
+
label_frame = ttk.Frame(original_frame)
|
|
67
|
+
label_frame.pack(fill=tk.X)
|
|
68
|
+
ttk.Label(label_frame, text="Original Document:").pack(side=tk.LEFT)
|
|
69
|
+
ttk.Button(label_frame, text="View Full", command=self.view_original).pack(side=tk.RIGHT)
|
|
70
|
+
self.original_text = scrolledtext.ScrolledText(original_frame, wrap=tk.WORD, height=8)
|
|
71
|
+
self.original_text.pack(fill=tk.BOTH, expand=True)
|
|
72
|
+
self.original_text.config(state='disabled')
|
|
73
|
+
|
|
74
|
+
# Summary display
|
|
75
|
+
summary_frame = ttk.Frame(results_frame)
|
|
76
|
+
summary_frame.pack(fill=tk.BOTH, expand=True, pady=5)
|
|
77
|
+
label_frame2 = ttk.Frame(summary_frame)
|
|
78
|
+
label_frame2.pack(fill=tk.X)
|
|
79
|
+
ttk.Label(label_frame2, text="Summary:").pack(side=tk.LEFT)
|
|
80
|
+
ttk.Button(label_frame2, text="View Full", command=self.view_summary).pack(side=tk.RIGHT)
|
|
81
|
+
self.summary_text = scrolledtext.ScrolledText(summary_frame, wrap=tk.WORD, height=8)
|
|
82
|
+
self.summary_text.pack(fill=tk.BOTH, expand=True)
|
|
83
|
+
self.summary_text.config(state='disabled')
|
|
84
|
+
|
|
85
|
+
# Bottom frame for Clear All and Save Summaries buttons
|
|
86
|
+
bottom_frame = ttk.Frame(main_frame)
|
|
87
|
+
bottom_frame.pack(side=tk.BOTTOM, fill=tk.X, pady=1)
|
|
88
|
+
ttk.Button(bottom_frame, text="Save Summaries", command=self.save_summaries).pack(side=tk.RIGHT, padx=5)
|
|
89
|
+
ttk.Button(bottom_frame, text="Clear All", command=self.clear_all).pack(side=tk.RIGHT, padx=5)
|
|
90
|
+
|
|
91
|
+
def update_summarization_ui(self):
|
|
92
|
+
"""Update the summarization UI based on data type (single or multiple)."""
|
|
93
|
+
# Clear existing widgets in sum_frame
|
|
94
|
+
for widget in self.sum_frame.winfo_children():
|
|
95
|
+
widget.destroy()
|
|
96
|
+
|
|
97
|
+
if self.is_single:
|
|
98
|
+
ttk.Button(self.sum_frame, text="Summarize", command=self.summarize_single).pack(side=tk.LEFT, padx=5)
|
|
99
|
+
else:
|
|
100
|
+
ttk.Button(self.sum_frame, text="Summarize Single Document", command=self.summarize_single).pack(side=tk.LEFT, padx=5)
|
|
101
|
+
ttk.Button(self.sum_frame, text="Summarize All Documents", command=self.summarize_all).pack(side=tk.LEFT, padx=5)
|
|
102
|
+
|
|
103
|
+
# Article ID input
|
|
104
|
+
id_frame = ttk.Frame(self.sum_frame)
|
|
105
|
+
id_frame.pack(side=tk.LEFT, padx=10)
|
|
106
|
+
ttk.Label(id_frame, text="Document ID:").pack(side=tk.LEFT)
|
|
107
|
+
self.article_id_entry = ttk.Entry(id_frame, width=10)
|
|
108
|
+
self.article_id_entry.pack(side=tk.LEFT, padx=5)
|
|
109
|
+
|
|
110
|
+
def save_summaries(self):
|
|
111
|
+
"""Save the generated summaries to a CSV file."""
|
|
112
|
+
if self.scored_sentences is None or self.df is None:
|
|
113
|
+
messagebox.showwarning("Warning", "No summaries to save. Please summarize first.")
|
|
114
|
+
return
|
|
115
|
+
if self.is_single:
|
|
116
|
+
# Summarize the single document
|
|
117
|
+
article_id = 1
|
|
118
|
+
article_text, summary = self.summarizer.summarize_article(self.scored_sentences, article_id, self.df)
|
|
119
|
+
data = [{"article_id": article_id, "article_text": article_text, "summary": summary}]
|
|
120
|
+
else:
|
|
121
|
+
# Summarize all documents
|
|
122
|
+
summaries = self.summarizer.summarize_all_articles(self.scored_sentences, self.df)
|
|
123
|
+
data = []
|
|
124
|
+
for article_id, d in summaries.items():
|
|
125
|
+
data.append({"article_id": article_id, "article_text": d["article"], "summary": d["summary"]})
|
|
126
|
+
file_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
|
|
127
|
+
if file_path:
|
|
128
|
+
try:
|
|
129
|
+
df = pd.DataFrame(data)
|
|
130
|
+
df.to_csv(file_path, index=False)
|
|
131
|
+
messagebox.showinfo("Success", f"Summaries saved to {file_path}")
|
|
132
|
+
except Exception as e:
|
|
133
|
+
messagebox.showerror("Error", f"Failed to save summaries: {str(e)}")
|
|
134
|
+
|
|
135
|
+
def clear_all(self):
|
|
136
|
+
"""Clear all data and reset the UI."""
|
|
137
|
+
self.is_single = False
|
|
138
|
+
self.df = None
|
|
139
|
+
self.scored_sentences = None
|
|
140
|
+
self.summarizer = None
|
|
141
|
+
self.status_label.config(text="Ready to load data")
|
|
142
|
+
self.update_summarization_ui()
|
|
143
|
+
self.original_text.config(state='normal')
|
|
144
|
+
self.original_text.delete(1.0, tk.END)
|
|
145
|
+
self.original_text.config(state='disabled')
|
|
146
|
+
self.summary_text.config(state='normal')
|
|
147
|
+
self.summary_text.delete(1.0, tk.END)
|
|
148
|
+
self.summary_text.config(state='disabled')
|
|
149
|
+
|
|
150
|
+
def paste_single(self):
|
|
151
|
+
"""Open dialog to paste a single document."""
|
|
152
|
+
dialog = PasteDialog(self.root)
|
|
153
|
+
self.root.wait_window(dialog.top)
|
|
154
|
+
if dialog.result:
|
|
155
|
+
self.df = pd.DataFrame([{'article_id': 1, 'article_text': dialog.result}])
|
|
156
|
+
self.is_single = True
|
|
157
|
+
self.status_label.config(text="Single document loaded")
|
|
158
|
+
self.update_summarization_ui()
|
|
159
|
+
self.initialize_summarizer()
|
|
160
|
+
|
|
161
|
+
def upload_csv(self):
|
|
162
|
+
"""Upload and load a CSV file with documents."""
|
|
163
|
+
file_path = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
|
|
164
|
+
if file_path:
|
|
165
|
+
try:
|
|
166
|
+
self.df = pd.read_csv(file_path)
|
|
167
|
+
self.is_single = False
|
|
168
|
+
self.status_label.config(text=f"CSV loaded from {file_path}")
|
|
169
|
+
self.update_summarization_ui()
|
|
170
|
+
self.initialize_summarizer()
|
|
171
|
+
except Exception as e:
|
|
172
|
+
messagebox.showerror("Error", f"Failed to load CSV: {str(e)}")
|
|
173
|
+
|
|
174
|
+
def create_csv(self):
|
|
175
|
+
"""Open dialog to create a new CSV with multiple documents."""
|
|
176
|
+
dialog = CreateCSVDialog(self.root)
|
|
177
|
+
self.root.wait_window(dialog.top)
|
|
178
|
+
if dialog.result:
|
|
179
|
+
self.df = pd.DataFrame(dialog.result)
|
|
180
|
+
self.is_single = False
|
|
181
|
+
self.status_label.config(text="CSV created")
|
|
182
|
+
self.update_summarization_ui()
|
|
183
|
+
self.initialize_summarizer()
|
|
184
|
+
|
|
185
|
+
def initialize_summarizer(self):
|
|
186
|
+
"""Initialize the summarizer and start processing data in a thread."""
|
|
187
|
+
if self.df is not None and not self.df.empty:
|
|
188
|
+
self.summarizer = TextSummarizer()
|
|
189
|
+
self.status_label.config(text="Processing data...")
|
|
190
|
+
threading.Thread(target=self.process_data).start()
|
|
191
|
+
|
|
192
|
+
def process_data(self):
|
|
193
|
+
"""Process the data to compute sentence scores."""
|
|
194
|
+
try:
|
|
195
|
+
self.scored_sentences = self.summarizer.run_summarization(self.df)
|
|
196
|
+
self.status_label.config(text="Data processed successfully")
|
|
197
|
+
except Exception as e:
|
|
198
|
+
messagebox.showerror("Error", f"Processing failed: {str(e)}")
|
|
199
|
+
|
|
200
|
+
def summarize_single(self):
|
|
201
|
+
"""Summarize a single document."""
|
|
202
|
+
if self.scored_sentences is None:
|
|
203
|
+
messagebox.showwarning("Warning", "Please load and process data first")
|
|
204
|
+
return
|
|
205
|
+
if self.is_single:
|
|
206
|
+
article_id = 1
|
|
207
|
+
else:
|
|
208
|
+
try:
|
|
209
|
+
article_id = int(self.article_id_entry.get())
|
|
210
|
+
except ValueError:
|
|
211
|
+
messagebox.showerror("Error", "Invalid Document ID")
|
|
212
|
+
return
|
|
213
|
+
article_text, summary = self.summarizer.summarize_article(self.scored_sentences, article_id, self.df)
|
|
214
|
+
if article_text and summary:
|
|
215
|
+
self.display_result(article_text, summary)
|
|
216
|
+
else:
|
|
217
|
+
messagebox.showerror("Error", f"Document ID {article_id} not found")
|
|
218
|
+
|
|
219
|
+
def summarize_all(self):
|
|
220
|
+
"""Summarize all documents and display in text areas."""
|
|
221
|
+
if self.scored_sentences is None:
|
|
222
|
+
messagebox.showwarning("Warning", "Please load and process data first")
|
|
223
|
+
return
|
|
224
|
+
summaries = self.summarizer.summarize_all_articles(self.scored_sentences, self.df)
|
|
225
|
+
self.original_text.config(state='normal')
|
|
226
|
+
self.summary_text.config(state='normal')
|
|
227
|
+
self.original_text.delete(1.0, tk.END)
|
|
228
|
+
self.summary_text.delete(1.0, tk.END)
|
|
229
|
+
for article_id, data in summaries.items():
|
|
230
|
+
self.original_text.insert(tk.END, f"Document ID: {article_id}\n{data['article']}\n\n")
|
|
231
|
+
self.summary_text.insert(tk.END, f"Document ID: {article_id}\n{data['summary']}\n\n")
|
|
232
|
+
self.original_text.config(state='disabled')
|
|
233
|
+
self.summary_text.config(state='disabled')
|
|
234
|
+
|
|
235
|
+
def display_result(self, article, summary):
|
|
236
|
+
"""Display the article and summary in the text areas."""
|
|
237
|
+
self.original_text.config(state='normal')
|
|
238
|
+
self.original_text.delete(1.0, tk.END)
|
|
239
|
+
self.original_text.insert(tk.END, article)
|
|
240
|
+
self.original_text.config(state='disabled')
|
|
241
|
+
self.summary_text.config(state='normal')
|
|
242
|
+
self.summary_text.delete(1.0, tk.END)
|
|
243
|
+
self.summary_text.insert(tk.END, summary)
|
|
244
|
+
self.summary_text.config(state='disabled')
|
|
245
|
+
|
|
246
|
+
def view_original(self):
|
|
247
|
+
"""Open a full view window for the original document."""
|
|
248
|
+
top = tk.Toplevel(self.root)
|
|
249
|
+
top.title("Original Document - Full View")
|
|
250
|
+
top.geometry("900x700")
|
|
251
|
+
top.resizable(True, True)
|
|
252
|
+
text = scrolledtext.ScrolledText(top, wrap=tk.WORD)
|
|
253
|
+
text.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
|
254
|
+
text.insert(tk.END, self.original_text.get(1.0, tk.END))
|
|
255
|
+
text.config(state=tk.DISABLED) # Make it read-only
|
|
256
|
+
|
|
257
|
+
def view_summary(self):
|
|
258
|
+
"""Open a full view window for the summary."""
|
|
259
|
+
top = tk.Toplevel(self.root)
|
|
260
|
+
top.title("Summary - Full View")
|
|
261
|
+
top.geometry("900x700")
|
|
262
|
+
top.resizable(True, True)
|
|
263
|
+
text = scrolledtext.ScrolledText(top, wrap=tk.WORD)
|
|
264
|
+
text.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
|
|
265
|
+
text.insert(tk.END, self.summary_text.get(1.0, tk.END))
|
|
266
|
+
text.config(state=tk.DISABLED) # Make it read-only
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class PasteDialog:
|
|
270
|
+
"""Dialog for pasting a single document."""
|
|
271
|
+
|
|
272
|
+
def __init__(self, parent):
|
|
273
|
+
self.top = tk.Toplevel(parent)
|
|
274
|
+
self.top.title("Paste Document")
|
|
275
|
+
self.top.geometry("700x600")
|
|
276
|
+
self.top.transient(parent)
|
|
277
|
+
self.top.grab_set()
|
|
278
|
+
self.result = None
|
|
279
|
+
|
|
280
|
+
ttk.Label(self.top, text="Paste your document:").pack(pady=5)
|
|
281
|
+
self.text = scrolledtext.ScrolledText(self.top, wrap=tk.WORD)
|
|
282
|
+
self.text.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
|
283
|
+
|
|
284
|
+
button_frame = ttk.Frame(self.top)
|
|
285
|
+
button_frame.pack(side=tk.BOTTOM, fill=tk.X, pady=5)
|
|
286
|
+
ttk.Button(button_frame, text="OK", command=self.ok).pack(side=tk.RIGHT, padx=5)
|
|
287
|
+
ttk.Button(button_frame, text="Cancel", command=self.cancel).pack(side=tk.RIGHT, padx=5)
|
|
288
|
+
|
|
289
|
+
def ok(self):
|
|
290
|
+
self.result = self.text.get(1.0, tk.END).strip()
|
|
291
|
+
self.top.destroy()
|
|
292
|
+
|
|
293
|
+
def cancel(self):
|
|
294
|
+
self.top.destroy()
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class CreateCSVDialog:
|
|
298
|
+
"""Dialog for creating a CSV with multiple documents."""
|
|
299
|
+
|
|
300
|
+
def __init__(self, parent):
|
|
301
|
+
self.top = tk.Toplevel(parent)
|
|
302
|
+
self.top.title("Create CSV")
|
|
303
|
+
self.top.geometry("700x600")
|
|
304
|
+
self.top.transient(parent)
|
|
305
|
+
self.top.grab_set()
|
|
306
|
+
self.result = []
|
|
307
|
+
|
|
308
|
+
self.articles = []
|
|
309
|
+
self.counter = 1
|
|
310
|
+
|
|
311
|
+
ttk.Label(self.top, text="Enter documents (ID and text):").pack(pady=5)
|
|
312
|
+
|
|
313
|
+
input_frame = ttk.Frame(self.top)
|
|
314
|
+
input_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
315
|
+
|
|
316
|
+
ttk.Label(input_frame, text="Document ID:").grid(row=0, column=0, sticky=tk.W)
|
|
317
|
+
self.id_entry = ttk.Entry(input_frame, width=10)
|
|
318
|
+
self.id_entry.grid(row=0, column=1, padx=5)
|
|
319
|
+
|
|
320
|
+
ttk.Label(input_frame, text="Document Text:").grid(row=1, column=0, sticky=tk.W)
|
|
321
|
+
self.text_entry = scrolledtext.ScrolledText(input_frame, wrap=tk.WORD, height=5)
|
|
322
|
+
self.text_entry.grid(row=1, column=1, padx=5, pady=5)
|
|
323
|
+
|
|
324
|
+
button_frame = ttk.Frame(self.top)
|
|
325
|
+
button_frame.pack(fill=tk.X, padx=10, pady=5)
|
|
326
|
+
|
|
327
|
+
ttk.Button(button_frame, text="Add Document", command=self.add_article).pack(side=tk.LEFT, padx=5)
|
|
328
|
+
ttk.Button(button_frame, text="Done", command=self.done).pack(side=tk.LEFT, padx=5)
|
|
329
|
+
ttk.Button(button_frame, text="Cancel", command=self.cancel).pack(side=tk.RIGHT, padx=5)
|
|
330
|
+
|
|
331
|
+
self.listbox = tk.Listbox(self.top, height=10)
|
|
332
|
+
self.listbox.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
|
|
333
|
+
|
|
334
|
+
def add_article(self):
|
|
335
|
+
try:
|
|
336
|
+
article_id = int(self.id_entry.get())
|
|
337
|
+
article_text = self.text_entry.get(1.0, tk.END).strip()
|
|
338
|
+
if article_text:
|
|
339
|
+
self.articles.append({'article_id': article_id, 'article_text': article_text})
|
|
340
|
+
self.listbox.insert(tk.END, f"ID: {article_id} - {article_text[:50]}...")
|
|
341
|
+
self.id_entry.delete(0, tk.END)
|
|
342
|
+
self.text_entry.delete(1.0, tk.END)
|
|
343
|
+
self.counter += 1
|
|
344
|
+
self.id_entry.insert(0, str(self.counter))
|
|
345
|
+
else:
|
|
346
|
+
messagebox.showwarning("Warning", "Document cannot be empty")
|
|
347
|
+
except ValueError:
|
|
348
|
+
messagebox.showerror("Error", "Invalid Document ID")
|
|
349
|
+
|
|
350
|
+
def done(self):
|
|
351
|
+
if self.articles:
|
|
352
|
+
save = messagebox.askyesno("Save CSV", "Do you want to save the CSV file?")
|
|
353
|
+
if save:
|
|
354
|
+
file_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
|
|
355
|
+
if file_path:
|
|
356
|
+
df = pd.DataFrame(self.articles)
|
|
357
|
+
df.to_csv(file_path, index=False)
|
|
358
|
+
try:
|
|
359
|
+
loaded_df = pd.read_csv(file_path)
|
|
360
|
+
self.result = loaded_df.to_dict(orient='records')
|
|
361
|
+
except Exception:
|
|
362
|
+
self.result = self.articles
|
|
363
|
+
else:
|
|
364
|
+
self.result = self.articles
|
|
365
|
+
else:
|
|
366
|
+
self.result = self.articles
|
|
367
|
+
self.top.destroy()
|
|
368
|
+
|
|
369
|
+
def cancel(self):
|
|
370
|
+
self.top.destroy()
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def main():
|
|
374
|
+
root = tk.Tk()
|
|
375
|
+
app = TextSummarizerUI(root)
|
|
376
|
+
root.mainloop()
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
if __name__ == "__main__":
|
|
380
380
|
main()
|