thinkpdf 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pdfbrain/__init__.py ADDED
@@ -0,0 +1,22 @@
1
+ """
2
+ thinkpdf - The Ultimate PDF to Markdown Converter
3
+
4
+ Combines the best of pdfmd and Marker with exclusive features:
5
+ - Modern GUI with CustomTkinter
6
+ - CLI tool for automation
7
+ - MCP Server for IDE integration
8
+ - Intelligent caching
9
+ - Optional LLM validation
10
+ """
11
+
12
+ __version__ = "1.0.0"
13
+ __author__ = "thinkpdf Team"
14
+
15
+ from .core.extractor import PDFExtractor
16
+ from .core.converter import PDFConverter
17
+
18
+ __all__ = [
19
+ "PDFExtractor",
20
+ "PDFConverter",
21
+ "__version__",
22
+ ]
pdfbrain/app_gui.py ADDED
@@ -0,0 +1,530 @@
1
+ """
2
+ thinkpdf GUI - Modern desktop application for PDF to Markdown conversion.
3
+
4
+ Features:
5
+ - Modern dark/light theme with CustomTkinter
6
+ - Drag and drop support
7
+ - Progress tracking
8
+ - Quality selection
9
+ - Batch processing
10
+ - Preview panel
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ import sys
17
+ import threading
18
+ from pathlib import Path
19
+ from typing import Optional, List
20
+ from tkinter import filedialog, messagebox
21
+ import tkinter as tk
22
+
23
+ try:
24
+ import customtkinter as ctk
25
+ HAS_CTK = True
26
+ except ImportError:
27
+ HAS_CTK = False
28
+ ctk = None
29
+
30
+ try:
31
+ from PIL import Image
32
+ HAS_PIL = True
33
+ except ImportError:
34
+ HAS_PIL = False
35
+
36
+ # Version
37
+ __version__ = "1.0.0"
38
+
39
+ from .core.converter import PDFConverter, ConversionOptions, ConversionResult
40
+ from .cache.cache_manager import CacheManager
41
+
42
+
43
+ # Color schemes
44
+ COLORS = {
45
+ "dark": {
46
+ "bg": "#1a1a2e",
47
+ "fg": "#eaeaea",
48
+ "accent": "#7c3aed",
49
+ "accent_hover": "#8b5cf6",
50
+ "secondary": "#2d2d44",
51
+ "success": "#10b981",
52
+ "error": "#ef4444",
53
+ "border": "#3d3d5c",
54
+ },
55
+ "light": {
56
+ "bg": "#f8fafc",
57
+ "fg": "#1e293b",
58
+ "accent": "#7c3aed",
59
+ "accent_hover": "#8b5cf6",
60
+ "secondary": "#e2e8f0",
61
+ "success": "#10b981",
62
+ "error": "#ef4444",
63
+ "border": "#cbd5e1",
64
+ },
65
+ }
66
+
67
+
68
+ class thinkpdfApp:
69
+ """Main application window."""
70
+
71
+ def __init__(self):
72
+ if not HAS_CTK:
73
+ self._run_fallback()
74
+ return
75
+
76
+ # Configure CustomTkinter
77
+ ctk.set_appearance_mode("dark")
78
+ ctk.set_default_color_theme("blue")
79
+
80
+ # Create window
81
+ self.root = ctk.CTk()
82
+ self.root.title("thinkpdf - PDF to Markdown Converter")
83
+ self.root.geometry("900x650")
84
+ self.root.minsize(800, 600)
85
+
86
+ # State
87
+ self.input_files: List[Path] = []
88
+ self.is_converting = False
89
+ self.cache = CacheManager()
90
+
91
+ # Build UI
92
+ self._build_ui()
93
+
94
+ # Enable drag and drop on Windows
95
+ self._setup_dnd()
96
+
97
+ def _run_fallback(self):
98
+ """Run a simple fallback if CustomTkinter is not available."""
99
+ root = tk.Tk()
100
+ root.title("thinkpdf")
101
+ root.geometry("500x300")
102
+
103
+ label = tk.Label(
104
+ root,
105
+ text="thinkpdf requires CustomTkinter for the GUI.\n\n"
106
+ "Install it with:\n"
107
+ "pip install customtkinter\n\n"
108
+ "Or use the CLI:\n"
109
+ "thinkpdf input.pdf",
110
+ font=("Arial", 12),
111
+ padx=20,
112
+ pady=20,
113
+ )
114
+ label.pack(expand=True)
115
+
116
+ root.mainloop()
117
+
118
+ def _build_ui(self):
119
+ """Build the main UI."""
120
+ # Main container
121
+ main_frame = ctk.CTkFrame(self.root)
122
+ main_frame.pack(fill="both", expand=True, padx=20, pady=20)
123
+
124
+ # Header
125
+ self._build_header(main_frame)
126
+
127
+ # Drop zone
128
+ self._build_drop_zone(main_frame)
129
+
130
+ # Options
131
+ self._build_options(main_frame)
132
+
133
+ # File list
134
+ self._build_file_list(main_frame)
135
+
136
+ # Progress
137
+ self._build_progress(main_frame)
138
+
139
+ # Buttons
140
+ self._build_buttons(main_frame)
141
+
142
+ # Status bar
143
+ self._build_status(main_frame)
144
+
145
+ def _build_header(self, parent):
146
+ """Build the header section."""
147
+ header = ctk.CTkFrame(parent, fg_color="transparent")
148
+ header.pack(fill="x", pady=(0, 15))
149
+
150
+ # Logo + Title
151
+ if HAS_PIL and HAS_CTK:
152
+ logo_path = Path(__file__).parent.parent / "logo.png"
153
+ if logo_path.exists():
154
+ try:
155
+ logo_img = ctk.CTkImage(
156
+ light_image=Image.open(logo_path),
157
+ dark_image=Image.open(logo_path),
158
+ size=(40, 40)
159
+ )
160
+ logo_label = ctk.CTkLabel(header, image=logo_img, text="")
161
+ logo_label.pack(side="left", padx=(0, 10))
162
+ except Exception:
163
+ pass
164
+
165
+ # Title
166
+ title = ctk.CTkLabel(
167
+ header,
168
+ text="thinkpdf",
169
+ font=ctk.CTkFont(size=28, weight="bold"),
170
+ )
171
+ title.pack(side="left")
172
+
173
+ # Theme toggle
174
+ self.theme_var = ctk.StringVar(value="dark")
175
+ theme_btn = ctk.CTkSegmentedButton(
176
+ header,
177
+ values=["☀️ Light", "🌙 Dark"],
178
+ command=self._toggle_theme,
179
+ width=150,
180
+ )
181
+ theme_btn.set("🌙 Dark")
182
+ theme_btn.pack(side="right")
183
+
184
+ def _build_drop_zone(self, parent):
185
+ """Build the drag and drop zone."""
186
+ self.drop_frame = ctk.CTkFrame(
187
+ parent,
188
+ height=120,
189
+ corner_radius=15,
190
+ border_width=2,
191
+ border_color=COLORS["dark"]["accent"],
192
+ )
193
+ self.drop_frame.pack(fill="x", pady=(0, 15))
194
+ self.drop_frame.pack_propagate(False)
195
+
196
+ drop_label = ctk.CTkLabel(
197
+ self.drop_frame,
198
+ text="📁 Drag & Drop PDF files here\nor click to browse",
199
+ font=ctk.CTkFont(size=16),
200
+ )
201
+ drop_label.pack(expand=True)
202
+
203
+ # Make clickable
204
+ self.drop_frame.bind("<Button-1>", self._browse_files)
205
+ drop_label.bind("<Button-1>", self._browse_files)
206
+
207
+ def _build_options(self, parent):
208
+ """Build the options section."""
209
+ options_frame = ctk.CTkFrame(parent, fg_color="transparent")
210
+ options_frame.pack(fill="x", pady=(0, 15))
211
+
212
+ # Quality selector
213
+ quality_label = ctk.CTkLabel(options_frame, text="Quality:")
214
+ quality_label.pack(side="left", padx=(0, 10))
215
+
216
+ self.quality_var = ctk.StringVar(value="balanced")
217
+ quality_menu = ctk.CTkSegmentedButton(
218
+ options_frame,
219
+ values=["⚡ Fast", "⚖️ Balanced", "🎯 Maximum"],
220
+ command=self._on_quality_change,
221
+ width=300,
222
+ )
223
+ quality_menu.set("⚖️ Balanced")
224
+ quality_menu.pack(side="left", padx=(0, 20))
225
+
226
+ # Options checkboxes
227
+ self.use_cache_var = ctk.BooleanVar(value=True)
228
+ cache_check = ctk.CTkCheckBox(
229
+ options_frame,
230
+ text="Use cache",
231
+ variable=self.use_cache_var,
232
+ )
233
+ cache_check.pack(side="left", padx=(0, 15))
234
+
235
+ self.export_images_var = ctk.BooleanVar(value=False)
236
+ images_check = ctk.CTkCheckBox(
237
+ options_frame,
238
+ text="Export images",
239
+ variable=self.export_images_var,
240
+ )
241
+ images_check.pack(side="left")
242
+
243
+ def _build_file_list(self, parent):
244
+ """Build the file list section."""
245
+ list_frame = ctk.CTkFrame(parent)
246
+ list_frame.pack(fill="both", expand=True, pady=(0, 15))
247
+
248
+ # Header
249
+ list_header = ctk.CTkFrame(list_frame, fg_color="transparent")
250
+ list_header.pack(fill="x", padx=15, pady=(10, 5))
251
+
252
+ ctk.CTkLabel(
253
+ list_header,
254
+ text="Files to Convert",
255
+ font=ctk.CTkFont(size=14, weight="bold"),
256
+ ).pack(side="left")
257
+
258
+ clear_btn = ctk.CTkButton(
259
+ list_header,
260
+ text="Clear All",
261
+ width=80,
262
+ height=28,
263
+ command=self._clear_files,
264
+ )
265
+ clear_btn.pack(side="right")
266
+
267
+ # Scrollable file list
268
+ self.file_list = ctk.CTkScrollableFrame(list_frame, height=150)
269
+ self.file_list.pack(fill="both", expand=True, padx=10, pady=(5, 10))
270
+
271
+ self.file_widgets: List[ctk.CTkFrame] = []
272
+
273
+ # Empty state
274
+ self.empty_label = ctk.CTkLabel(
275
+ self.file_list,
276
+ text="No files added yet",
277
+ text_color="gray",
278
+ )
279
+ self.empty_label.pack(pady=30)
280
+
281
+ def _build_progress(self, parent):
282
+ """Build the progress section."""
283
+ progress_frame = ctk.CTkFrame(parent, fg_color="transparent")
284
+ progress_frame.pack(fill="x", pady=(0, 15))
285
+
286
+ self.progress_label = ctk.CTkLabel(
287
+ progress_frame,
288
+ text="Ready to convert",
289
+ )
290
+ self.progress_label.pack(anchor="w")
291
+
292
+ self.progress_bar = ctk.CTkProgressBar(progress_frame)
293
+ self.progress_bar.pack(fill="x", pady=(5, 0))
294
+ self.progress_bar.set(0)
295
+
296
+ def _build_buttons(self, parent):
297
+ """Build the action buttons."""
298
+ btn_frame = ctk.CTkFrame(parent, fg_color="transparent")
299
+ btn_frame.pack(fill="x")
300
+
301
+ self.convert_btn = ctk.CTkButton(
302
+ btn_frame,
303
+ text="🚀 Convert All",
304
+ font=ctk.CTkFont(size=16, weight="bold"),
305
+ height=45,
306
+ command=self._start_conversion,
307
+ )
308
+ self.convert_btn.pack(side="left", expand=True, fill="x", padx=(0, 10))
309
+
310
+ self.open_folder_btn = ctk.CTkButton(
311
+ btn_frame,
312
+ text="📂 Open Output",
313
+ height=45,
314
+ width=150,
315
+ fg_color="transparent",
316
+ border_width=2,
317
+ command=self._open_output_folder,
318
+ )
319
+ self.open_folder_btn.pack(side="right")
320
+
321
+ def _build_status(self, parent):
322
+ """Build the status bar."""
323
+ status_frame = ctk.CTkFrame(parent, fg_color="transparent", height=30)
324
+ status_frame.pack(fill="x", pady=(15, 0))
325
+
326
+ self.status_label = ctk.CTkLabel(
327
+ status_frame,
328
+ text=f"thinkpdf v{__version__} | Ready",
329
+ text_color="gray",
330
+ )
331
+ self.status_label.pack(side="left")
332
+
333
+ cache_stats = self.cache.get_stats()
334
+ cache_label = ctk.CTkLabel(
335
+ status_frame,
336
+ text=f"Cache: {cache_stats['entries']} files, {cache_stats['total_size_mb']:.1f} MB",
337
+ text_color="gray",
338
+ )
339
+ cache_label.pack(side="right")
340
+
341
+ def _setup_dnd(self):
342
+ """Setup drag and drop (Windows)."""
343
+ # For now, just use file browser
344
+ # TODO: Add proper DnD with tkinterdnd2
345
+ pass
346
+
347
+ def _browse_files(self, event=None):
348
+ """Open file browser to select PDFs."""
349
+ files = filedialog.askopenfilenames(
350
+ title="Select PDF files",
351
+ filetypes=[("PDF files", "*.pdf"), ("All files", "*.*")],
352
+ )
353
+
354
+ if files:
355
+ for f in files:
356
+ self._add_file(Path(f))
357
+
358
+ def _add_file(self, file_path: Path):
359
+ """Add a file to the list."""
360
+ if file_path in self.input_files:
361
+ return
362
+
363
+ self.input_files.append(file_path)
364
+
365
+ # Hide empty label
366
+ self.empty_label.pack_forget()
367
+
368
+ # Create file widget
369
+ file_frame = ctk.CTkFrame(self.file_list)
370
+ file_frame.pack(fill="x", pady=2)
371
+
372
+ # File icon and name
373
+ ctk.CTkLabel(
374
+ file_frame,
375
+ text=f"📄 {file_path.name}",
376
+ anchor="w",
377
+ ).pack(side="left", padx=10, pady=8)
378
+
379
+ # Size
380
+ size_mb = file_path.stat().st_size / (1024 * 1024)
381
+ ctk.CTkLabel(
382
+ file_frame,
383
+ text=f"{size_mb:.1f} MB",
384
+ text_color="gray",
385
+ ).pack(side="right", padx=10)
386
+
387
+ self.file_widgets.append(file_frame)
388
+ self._update_status()
389
+
390
+ def _clear_files(self):
391
+ """Clear all files from the list."""
392
+ self.input_files.clear()
393
+
394
+ for widget in self.file_widgets:
395
+ widget.destroy()
396
+ self.file_widgets.clear()
397
+
398
+ self.empty_label.pack(pady=30)
399
+ self._update_status()
400
+
401
+ def _toggle_theme(self, value):
402
+ """Toggle between light and dark theme."""
403
+ if "Light" in value:
404
+ ctk.set_appearance_mode("light")
405
+ else:
406
+ ctk.set_appearance_mode("dark")
407
+
408
+ def _on_quality_change(self, value):
409
+ """Handle quality change."""
410
+ if "Fast" in value:
411
+ self.quality_var.set("fast")
412
+ elif "Maximum" in value:
413
+ self.quality_var.set("maximum")
414
+ else:
415
+ self.quality_var.set("balanced")
416
+
417
+ def _update_status(self):
418
+ """Update the status label."""
419
+ count = len(self.input_files)
420
+ if count == 0:
421
+ self.status_label.configure(text=f"thinkpdf v{__version__} | Ready")
422
+ else:
423
+ total_size = sum(f.stat().st_size for f in self.input_files) / (1024 * 1024)
424
+ self.status_label.configure(
425
+ text=f"thinkpdf v{__version__} | {count} files ({total_size:.1f} MB)"
426
+ )
427
+
428
+ def _start_conversion(self):
429
+ """Start the conversion process."""
430
+ if not self.input_files:
431
+ messagebox.showwarning("No files", "Please add PDF files to convert.")
432
+ return
433
+
434
+ if self.is_converting:
435
+ return
436
+
437
+ self.is_converting = True
438
+ self.convert_btn.configure(state="disabled", text="Converting...")
439
+
440
+ # Run in thread
441
+ thread = threading.Thread(target=self._convert_files)
442
+ thread.start()
443
+
444
+ def _convert_files(self):
445
+ """Convert all files (runs in background thread)."""
446
+ try:
447
+ options = ConversionOptions(
448
+ quality=self.quality_var.get(),
449
+ export_images=self.export_images_var.get(),
450
+ )
451
+
452
+ total = len(self.input_files)
453
+ success_count = 0
454
+
455
+ for i, pdf_path in enumerate(self.input_files):
456
+ # Update progress
457
+ self.root.after(0, lambda p=i, t=total: self._update_progress(p, t, pdf_path.name))
458
+
459
+ try:
460
+ converter = PDFConverter(options=options)
461
+ output_path = pdf_path.with_suffix(".md")
462
+ result = converter.convert(pdf_path, output_path=output_path)
463
+
464
+ # Cache if enabled
465
+ if self.use_cache_var.get():
466
+ self.cache.cache(pdf_path, result.markdown)
467
+
468
+ success_count += 1
469
+
470
+ except Exception as e:
471
+ print(f"Error converting {pdf_path.name}: {e}")
472
+
473
+ # Done
474
+ self.root.after(0, lambda: self._conversion_complete(success_count, total))
475
+
476
+ except Exception as e:
477
+ self.root.after(0, lambda: messagebox.showerror("Error", str(e)))
478
+
479
+ finally:
480
+ self.is_converting = False
481
+ self.root.after(0, lambda: self.convert_btn.configure(
482
+ state="normal",
483
+ text="🚀 Convert All"
484
+ ))
485
+
486
+ def _update_progress(self, current: int, total: int, filename: str):
487
+ """Update progress bar and label."""
488
+ progress = (current + 1) / total
489
+ self.progress_bar.set(progress)
490
+ self.progress_label.configure(text=f"Converting: {filename} ({current + 1}/{total})")
491
+
492
+ def _conversion_complete(self, success: int, total: int):
493
+ """Handle conversion completion."""
494
+ self.progress_bar.set(1.0)
495
+ self.progress_label.configure(
496
+ text=f"✅ Completed: {success}/{total} files converted successfully"
497
+ )
498
+
499
+ if success == total:
500
+ messagebox.showinfo(
501
+ "Conversion Complete",
502
+ f"Successfully converted {total} files!"
503
+ )
504
+ else:
505
+ messagebox.showwarning(
506
+ "Conversion Complete",
507
+ f"Converted {success}/{total} files.\n"
508
+ f"{total - success} files had errors."
509
+ )
510
+
511
+ def _open_output_folder(self):
512
+ """Open the output folder in file explorer."""
513
+ if self.input_files:
514
+ folder = self.input_files[0].parent
515
+ os.startfile(str(folder))
516
+
517
+ def run(self):
518
+ """Start the application."""
519
+ if HAS_CTK:
520
+ self.root.mainloop()
521
+
522
+
523
+ def main():
524
+ """Entry point for the GUI."""
525
+ app = thinkpdfApp()
526
+ app.run()
527
+
528
+
529
+ if __name__ == "__main__":
530
+ main()
@@ -0,0 +1,5 @@
1
+ """Cache management module."""
2
+
3
+ from .cache_manager import CacheManager
4
+
5
+ __all__ = ["CacheManager"]