supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
@@ -0,0 +1,900 @@
1
+ """
2
+ Tracked Changes Management Module
3
+
4
+ This module handles tracked changes from DOCX files or TSV files.
5
+ Provides AI with examples of preferred editing patterns to learn translator style.
6
+
7
+ Classes:
8
+ - TrackedChangesAgent: Manages tracked changes data and provides search/filtering
9
+ - TrackedChangesBrowser: UI window for browsing and analyzing tracked changes
10
+ """
11
+
12
+ import tkinter as tk
13
+ from tkinter import ttk, filedialog, messagebox
14
+ import os
15
+ import re
16
+ import queue
17
+ from datetime import datetime
18
+ from typing import List, Tuple, Dict, Optional
19
+
20
+ # Import parse_docx_pairs from the parent modules (it's a standalone function)
21
+ # Note: This will be imported from the main file
22
+ # For now, we'll assume it's available in the calling context
23
+
24
+
25
+ class TrackedChangesAgent:
26
+ """
27
+ Manages tracked changes from DOCX files or TSV files.
28
+ Provides AI with examples of preferred editing patterns to learn translator style.
29
+ """
30
+ def __init__(self, log_callback=None):
31
+ self.change_data = [] # List of (original_text, final_text) tuples
32
+ self.files_loaded = [] # Track which files have been loaded
33
+ self.log_callback = log_callback or print
34
+
35
+ def log(self, message):
36
+ """Log a message"""
37
+ if callable(self.log_callback):
38
+ self.log_callback(message)
39
+
40
+ def load_docx_changes(self, docx_path, parse_docx_pairs_func):
41
+ """Load tracked changes from a DOCX file
42
+
43
+ Args:
44
+ docx_path: Path to DOCX file
45
+ parse_docx_pairs_func: Function to parse DOCX and extract change pairs
46
+ """
47
+ if not docx_path:
48
+ return False
49
+
50
+ self.log(f"[Tracked Changes] Loading changes from: {os.path.basename(docx_path)}")
51
+
52
+ try:
53
+ new_changes = parse_docx_pairs_func(docx_path)
54
+
55
+ # Clear existing changes to prevent duplicates
56
+ self.change_data.clear()
57
+ self.files_loaded.clear()
58
+
59
+ # Add new changes
60
+ self.change_data.extend(new_changes)
61
+ self.files_loaded.append(os.path.basename(docx_path))
62
+
63
+ self.log(f"[Tracked Changes] Loaded {len(new_changes)} change pairs from {os.path.basename(docx_path)}")
64
+ self.log(f"[Tracked Changes] Total change pairs available: {len(self.change_data)}")
65
+
66
+ return True
67
+ except Exception as e:
68
+ self.log(f"[Tracked Changes] Error loading {docx_path}: {e}")
69
+ messagebox.showerror("Tracked Changes Error",
70
+ f"Failed to load tracked changes from {os.path.basename(docx_path)}:\\n{e}")
71
+ return False
72
+
73
+ def load_tsv_changes(self, tsv_path):
74
+ """Load tracked changes from a TSV file (original_text<tab>final_text format)"""
75
+ if not tsv_path:
76
+ return False
77
+
78
+ self.log(f"[Tracked Changes] Loading changes from: {os.path.basename(tsv_path)}")
79
+
80
+ try:
81
+ new_changes = []
82
+ with open(tsv_path, 'r', encoding='utf-8') as f:
83
+ for line_num, line in enumerate(f, 1):
84
+ line = line.rstrip('\n\r')
85
+ if not line.strip():
86
+ continue
87
+
88
+ # Skip header line if it looks like one
89
+ if line_num == 1 and ('original' in line.lower() and 'final' in line.lower()):
90
+ continue
91
+
92
+ parts = line.split('\t')
93
+ if len(parts) >= 2:
94
+ original = parts[0].strip()
95
+ final = parts[1].strip()
96
+ if original and final and original != final: # Only add if actually different
97
+ new_changes.append((original, final))
98
+ else:
99
+ self.log(f"[Tracked Changes] Skipping line {line_num}: insufficient columns")
100
+
101
+ # Add to existing changes
102
+ self.change_data.extend(new_changes)
103
+ self.files_loaded.append(os.path.basename(tsv_path))
104
+
105
+ self.log(f"[Tracked Changes] Loaded {len(new_changes)} change pairs from {os.path.basename(tsv_path)}")
106
+ self.log(f"[Tracked Changes] Total change pairs available: {len(self.change_data)}")
107
+
108
+ return True
109
+ except Exception as e:
110
+ self.log(f"[Tracked Changes] Error loading {tsv_path}: {e}")
111
+ messagebox.showerror("Tracked Changes Error",
112
+ f"Failed to load tracked changes from {os.path.basename(tsv_path)}:\\n{e}")
113
+ return False
114
+
115
+ def clear_changes(self):
116
+ """Clear all loaded tracked changes"""
117
+ self.change_data.clear()
118
+ self.files_loaded.clear()
119
+ self.log("[Tracked Changes] All tracked changes cleared")
120
+
121
+ def search_changes(self, search_text, exact_match=False):
122
+ """Search for changes containing the search text"""
123
+ if not search_text.strip():
124
+ return self.change_data
125
+
126
+ search_lower = search_text.lower()
127
+ results = []
128
+
129
+ for original, final in self.change_data:
130
+ if exact_match:
131
+ if search_text == original or search_text == final:
132
+ results.append((original, final))
133
+ else:
134
+ if (search_lower in original.lower() or
135
+ search_lower in final.lower()):
136
+ results.append((original, final))
137
+
138
+ return results
139
+
140
+ def find_relevant_changes(self, source_segments, max_changes=10):
141
+ """
142
+ Find tracked changes relevant to the current source segments being processed.
143
+ Uses two-pass algorithm: exact matches first, then partial word overlap.
144
+ """
145
+ if not self.change_data or not source_segments:
146
+ return []
147
+
148
+ relevant_changes = []
149
+
150
+ # First pass: exact matches
151
+ for segment in source_segments:
152
+ segment_lower = segment.lower().strip()
153
+ for original, final in self.change_data:
154
+ original_lower = original.lower().strip()
155
+ if segment_lower == original_lower and (original, final) not in relevant_changes:
156
+ relevant_changes.append((original, final))
157
+ if len(relevant_changes) >= max_changes:
158
+ return relevant_changes
159
+
160
+ # Second pass: partial matches (word overlap)
161
+ if len(relevant_changes) < max_changes:
162
+ for segment in source_segments:
163
+ segment_words = set(word.lower() for word in segment.split() if len(word) > 3)
164
+ for original, final in self.change_data:
165
+ if (original, final) in relevant_changes:
166
+ continue
167
+
168
+ original_words = set(word.lower() for word in original.split() if len(word) > 3)
169
+ # Check if there's significant word overlap
170
+ if segment_words and original_words:
171
+ overlap = len(segment_words.intersection(original_words))
172
+ min_overlap = min(2, len(segment_words) // 2)
173
+ if overlap >= min_overlap:
174
+ relevant_changes.append((original, final))
175
+ if len(relevant_changes) >= max_changes:
176
+ return relevant_changes
177
+
178
+ return relevant_changes
179
+
180
+ def get_entry_count(self):
181
+ """Get number of loaded change pairs"""
182
+ return len(self.change_data)
183
+
184
+
185
+ class TrackedChangesBrowser:
186
+ """Browser UI for viewing and searching tracked changes"""
187
+
188
+ def __init__(self, parent, tracked_changes_agent, parent_app=None, log_queue=None,
189
+ gemini_available=False, anthropic_available=False, openai_available=False, app_version="3.6.0"):
190
+ self.parent = parent
191
+ self.tracked_changes_agent = tracked_changes_agent
192
+ self.parent_app = parent_app # Reference to main app for AI settings
193
+ self.log_queue = log_queue if log_queue else queue.Queue()
194
+ self.window = None
195
+
196
+ # AI availability flags
197
+ self.GEMINI_AVAILABLE = gemini_available
198
+ self.ANTHROPIC_AVAILABLE = anthropic_available
199
+ self.OPENAI_AVAILABLE = openai_available
200
+ self.APP_VERSION = app_version
201
+
202
+ def show_browser(self):
203
+ """Show the tracked changes browser window"""
204
+ if not self.tracked_changes_agent.change_data:
205
+ messagebox.showinfo("No Changes", "No tracked changes loaded. Load a DOCX or TSV file with tracked changes first.")
206
+ return
207
+
208
+ # Create window if it doesn't exist
209
+ if self.window is None or not self.window.winfo_exists():
210
+ self.create_window()
211
+ else:
212
+ self.window.lift()
213
+
214
+ def create_window(self):
215
+ """Create the browser window"""
216
+ self.window = tk.Toplevel(self.parent)
217
+ self.window.title(f"Tracked Changes Browser ({len(self.tracked_changes_agent.change_data)} pairs)")
218
+ self.window.geometry("900x700") # Taller to accommodate detail view
219
+
220
+ # Search frame
221
+ search_frame = tk.Frame(self.window)
222
+ search_frame.pack(fill=tk.X, padx=10, pady=5)
223
+
224
+ tk.Label(search_frame, text="Search:").pack(side=tk.LEFT)
225
+ self.search_var = tk.StringVar()
226
+ search_entry = tk.Entry(search_frame, textvariable=self.search_var, width=40)
227
+ search_entry.pack(side=tk.LEFT, padx=(5,0))
228
+ search_entry.bind('<KeyRelease>', self.on_search)
229
+
230
+ self.exact_match_var = tk.BooleanVar()
231
+ tk.Checkbutton(search_frame, text="Exact match", variable=self.exact_match_var,
232
+ command=self.on_search).pack(side=tk.LEFT, padx=(10,0))
233
+
234
+ tk.Button(search_frame, text="Clear", command=self.clear_search).pack(side=tk.LEFT, padx=(10,0))
235
+
236
+ # Results info
237
+ self.results_label = tk.Label(self.window, text="")
238
+ self.results_label.pack(pady=2)
239
+
240
+ # Main content frame (results + detail)
241
+ main_frame = tk.Frame(self.window)
242
+ main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
243
+
244
+ # Results frame with scrollbar (top half)
245
+ results_frame = tk.Frame(main_frame)
246
+ results_frame.pack(fill=tk.BOTH, expand=True)
247
+
248
+ # Create Treeview for displaying changes
249
+ columns = ('Original', 'Final')
250
+ self.tree = ttk.Treeview(results_frame, columns=columns, show='headings', height=12)
251
+
252
+ # Define headings
253
+ self.tree.heading('Original', text='Original Text')
254
+ self.tree.heading('Final', text='Final Text')
255
+
256
+ # Configure column widths
257
+ self.tree.column('Original', width=400)
258
+ self.tree.column('Final', width=400)
259
+
260
+ # Add scrollbars
261
+ v_scrollbar = ttk.Scrollbar(results_frame, orient=tk.VERTICAL, command=self.tree.yview)
262
+ h_scrollbar = ttk.Scrollbar(results_frame, orient=tk.HORIZONTAL, command=self.tree.xview)
263
+ self.tree.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set)
264
+
265
+ # Pack tree and scrollbars
266
+ self.tree.grid(row=0, column=0, sticky="nsew")
267
+ v_scrollbar.grid(row=0, column=1, sticky="ns")
268
+ h_scrollbar.grid(row=1, column=0, sticky="ew")
269
+
270
+ results_frame.grid_rowconfigure(0, weight=1)
271
+ results_frame.grid_columnconfigure(0, weight=1)
272
+
273
+ # Detail view frame (bottom half)
274
+ detail_frame = tk.LabelFrame(main_frame, text="Selected Change Details", padx=5, pady=5)
275
+ detail_frame.pack(fill=tk.BOTH, expand=False, pady=(10,0))
276
+
277
+ # Original text display
278
+ tk.Label(detail_frame, text="Original Text:", font=("Segoe UI", 10, "bold")).pack(anchor="w")
279
+ self.original_text = tk.Text(detail_frame, height=4, wrap=tk.WORD, state="disabled",
280
+ bg="#f8f8f8", relief="solid", borderwidth=1)
281
+ self.original_text.pack(fill=tk.X, pady=(2,5))
282
+
283
+ # Final text display
284
+ tk.Label(detail_frame, text="Final Text:", font=("Segoe UI", 10, "bold")).pack(anchor="w")
285
+ self.final_text = tk.Text(detail_frame, height=4, wrap=tk.WORD, state="disabled",
286
+ bg="#f0f8ff", relief="solid", borderwidth=1)
287
+ self.final_text.pack(fill=tk.X, pady=(2,0))
288
+
289
+ # Bind selection event
290
+ self.tree.bind('<<TreeviewSelect>>', self.on_selection_change)
291
+
292
+ # Context menu for copying
293
+ self.context_menu = tk.Menu(self.window, tearoff=0)
294
+ self.context_menu.add_command(label="Copy original", command=self.copy_original)
295
+ self.context_menu.add_command(label="Copy final", command=self.copy_final)
296
+ self.context_menu.add_command(label="Copy both", command=self.copy_both)
297
+
298
+ self.tree.bind("<Button-3>", self.show_context_menu) # Right click
299
+
300
+ # Export button frame
301
+ export_frame = tk.Frame(self.window)
302
+ export_frame.pack(fill=tk.X, padx=10, pady=5)
303
+
304
+ tk.Button(export_frame, text="📊 Export Report (MD)", command=self.export_to_md_report,
305
+ bg="#4CAF50", fg="white", font=("Segoe UI", 10, "bold"),
306
+ relief="raised", padx=10, pady=5).pack(side=tk.LEFT)
307
+
308
+ tk.Label(export_frame, text="Export tracked changes report with AI-powered change analysis",
309
+ fg="gray").pack(side=tk.LEFT, padx=(10,0))
310
+
311
+ # Status bar
312
+ status_frame = tk.Frame(self.window)
313
+ status_frame.pack(fill=tk.X, padx=10, pady=2)
314
+
315
+ files_text = f"Files loaded: {', '.join(self.tracked_changes_agent.files_loaded)}" if self.tracked_changes_agent.files_loaded else "No files loaded"
316
+ tk.Label(status_frame, text=files_text, anchor=tk.W).pack(fill=tk.X)
317
+
318
+ # Load all changes initially
319
+ self.load_results(self.tracked_changes_agent.change_data)
320
+
321
+ def on_selection_change(self, event=None):
322
+ """Handle selection change in the tree"""
323
+ selection = self.tree.selection()
324
+ if not selection:
325
+ # Clear detail view if no selection
326
+ self.original_text.config(state="normal")
327
+ self.original_text.delete(1.0, tk.END)
328
+ self.original_text.config(state="disabled")
329
+ self.final_text.config(state="normal")
330
+ self.final_text.delete(1.0, tk.END)
331
+ self.final_text.config(state="disabled")
332
+ return
333
+
334
+ # Get the selected change pair
335
+ original, final = self.get_selected_change()
336
+ if original and final:
337
+ # Update original text display
338
+ self.original_text.config(state="normal")
339
+ self.original_text.delete(1.0, tk.END)
340
+ self.original_text.insert(1.0, original)
341
+ self.original_text.config(state="disabled")
342
+
343
+ # Update final text display
344
+ self.final_text.config(state="normal")
345
+ self.final_text.delete(1.0, tk.END)
346
+ self.final_text.insert(1.0, final)
347
+ self.final_text.config(state="disabled")
348
+
349
+ def on_search(self, event=None):
350
+ """Handle search input"""
351
+ search_text = self.search_var.get()
352
+ exact_match = self.exact_match_var.get()
353
+
354
+ results = self.tracked_changes_agent.search_changes(search_text, exact_match)
355
+ self.load_results(results)
356
+
357
+ def clear_search(self):
358
+ """Clear search and show all results"""
359
+ self.search_var.set("")
360
+ self.load_results(self.tracked_changes_agent.change_data)
361
+
362
+ def load_results(self, results):
363
+ """Load results into the treeview"""
364
+ # Clear existing items
365
+ for item in self.tree.get_children():
366
+ self.tree.delete(item)
367
+
368
+ # Add new items
369
+ for i, (original, final) in enumerate(results):
370
+ # Truncate long text for display
371
+ display_original = (original[:100] + "...") if len(original) > 100 else original
372
+ display_final = (final[:100] + "...") if len(final) > 100 else final
373
+
374
+ self.tree.insert('', 'end', values=(display_original, display_final))
375
+
376
+ # Update results label
377
+ total_changes = len(self.tracked_changes_agent.change_data)
378
+ showing = len(results)
379
+ if showing == total_changes:
380
+ self.results_label.config(text=f"Showing all {total_changes} change pairs")
381
+ else:
382
+ self.results_label.config(text=f"Showing {showing} of {total_changes} change pairs")
383
+
384
+ def show_context_menu(self, event):
385
+ """Show context menu for copying"""
386
+ item = self.tree.identify_row(event.y)
387
+ if item:
388
+ self.tree.selection_set(item)
389
+ self.context_menu.post(event.x_root, event.y_root)
390
+
391
+ def get_selected_change(self):
392
+ """Get the currently selected change pair"""
393
+ selection = self.tree.selection()
394
+ if not selection:
395
+ return None, None
396
+
397
+ item = selection[0]
398
+ index = self.tree.index(item)
399
+
400
+ # Get current results (might be filtered)
401
+ search_text = self.search_var.get()
402
+ exact_match = self.exact_match_var.get()
403
+ current_results = self.tracked_changes_agent.search_changes(search_text, exact_match)
404
+
405
+ if 0 <= index < len(current_results):
406
+ return current_results[index]
407
+ return None, None
408
+
409
+ def copy_original(self):
410
+ """Copy original text to clipboard"""
411
+ original, _ = self.get_selected_change()
412
+ if original:
413
+ self.window.clipboard_clear()
414
+ self.window.clipboard_append(original)
415
+
416
+ def copy_final(self):
417
+ """Copy final text to clipboard"""
418
+ _, final = self.get_selected_change()
419
+ if final:
420
+ self.window.clipboard_clear()
421
+ self.window.clipboard_append(final)
422
+
423
+ def copy_both(self):
424
+ """Copy both texts to clipboard"""
425
+ original, final = self.get_selected_change()
426
+ if original and final:
427
+ both_text = f"Original: {original}\n\nFinal: {final}"
428
+ self.window.clipboard_clear()
429
+ self.window.clipboard_append(both_text)
430
+
431
+
432
+ def export_to_md_report(self):
433
+ """Export tracked changes to a Markdown report with AI-powered change analysis"""
434
+ if not self.tracked_changes_agent.change_data:
435
+ messagebox.showwarning("No Data", "No tracked changes available to export.")
436
+ return
437
+
438
+ # Ask user whether to export all or filtered results
439
+ search_text = self.search_var.get()
440
+ if search_text:
441
+ # User has active search filter
442
+ result = messagebox.askyesnocancel(
443
+ "Export Scope",
444
+ f"You have an active search filter showing {len(self.tree.get_children())} of {len(self.tracked_changes_agent.change_data)} changes.\n\n"
445
+ "Yes = Export filtered results only\n"
446
+ "No = Export all tracked changes\n"
447
+ "Cancel = Cancel export"
448
+ )
449
+ if result is None: # Cancel
450
+ return
451
+ export_filtered = result
452
+ else:
453
+ export_filtered = False
454
+
455
+ # Get the data to export
456
+ if export_filtered:
457
+ exact_match = self.exact_match_var.get()
458
+ data_to_export = self.tracked_changes_agent.search_changes(search_text, exact_match)
459
+ default_filename = "tracked_changes_filtered_report.md"
460
+ else:
461
+ data_to_export = self.tracked_changes_agent.change_data
462
+ default_filename = "tracked_changes_report.md"
463
+
464
+ # Ask for save location
465
+ filepath = filedialog.asksaveasfilename(
466
+ title="Export Tracked Changes Report",
467
+ defaultextension=".md",
468
+ filetypes=(("Markdown files", "*.md"), ("All files", "*.*")),
469
+ initialfile=default_filename
470
+ )
471
+
472
+ if not filepath:
473
+ return
474
+
475
+ # Ask if user wants AI analysis
476
+ ai_analysis = messagebox.askyesno(
477
+ "AI Analysis",
478
+ f"Generate AI-powered change summaries?\n\n"
479
+ f"This will analyze {len(data_to_export)} changes using the currently selected AI model.\n\n"
480
+ f"Note: This may take a few minutes and will use API credits.\n\n"
481
+ f"Click 'No' to export without AI analysis."
482
+ )
483
+
484
+ # If AI analysis enabled, let user choose batch size
485
+ batch_size = 25 # Default
486
+ if ai_analysis:
487
+ batch_dialog = tk.Toplevel(self.window)
488
+ batch_dialog.title("Batch Size Configuration")
489
+ batch_dialog.geometry("450x280")
490
+ batch_dialog.transient(self.window)
491
+ batch_dialog.grab_set()
492
+
493
+ tk.Label(batch_dialog, text="Configure Batch Processing",
494
+ font=("Segoe UI", 11, "bold")).pack(pady=10)
495
+ tk.Label(batch_dialog,
496
+ text=f"Choose how many segments to process per AI request\n"
497
+ f"Larger batches = faster but more tokens per request",
498
+ font=("Segoe UI", 9)).pack(pady=5)
499
+
500
+ # Slider for batch size
501
+ batch_var = tk.IntVar(value=25)
502
+
503
+ slider_frame = tk.Frame(batch_dialog)
504
+ slider_frame.pack(pady=10, fill='x', padx=20)
505
+
506
+ tk.Label(slider_frame, text="Batch Size:", font=("Segoe UI", 9)).pack(side='left')
507
+ batch_label = tk.Label(slider_frame, text="25", font=("Segoe UI", 10, "bold"), fg="blue")
508
+ batch_label.pack(side='right')
509
+
510
+ def update_label(val):
511
+ batch_label.config(text=str(int(float(val))))
512
+
513
+ slider = tk.Scale(batch_dialog, from_=1, to=100, orient='horizontal',
514
+ variable=batch_var, command=update_label, length=350)
515
+ slider.pack(pady=5)
516
+
517
+ # Info label
518
+ info_label = tk.Label(batch_dialog,
519
+ text=f"Total changes: {len(data_to_export)} | "
520
+ f"Estimated batches at size 25: {(len(data_to_export) + 24) // 25}",
521
+ font=("Segoe UI", 8), fg="gray")
522
+ info_label.pack(pady=5)
523
+
524
+ def update_info(*args):
525
+ size = batch_var.get()
526
+ batches = (len(data_to_export) + size - 1) // size
527
+ info_label.config(text=f"Total changes: {len(data_to_export)} | "
528
+ f"Estimated batches at size {size}: {batches}")
529
+
530
+ batch_var.trace('w', update_info)
531
+
532
+ # OK button
533
+ def on_ok():
534
+ nonlocal batch_size
535
+ batch_size = batch_var.get()
536
+ batch_dialog.destroy()
537
+
538
+ tk.Button(batch_dialog, text="OK", command=on_ok,
539
+ font=("Segoe UI", 10), width=15).pack(pady=10)
540
+
541
+ # Wait for dialog to close
542
+ batch_dialog.wait_window()
543
+
544
+ try:
545
+ # Prepare report content
546
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
547
+
548
+ # Build AI prompt info for report header
549
+ ai_prompt_info = ""
550
+ if ai_analysis and hasattr(self, 'parent_app') and self.parent_app:
551
+ provider = self.parent_app.current_llm_provider
552
+ model = self.parent_app.current_llm_model
553
+ # Capitalize provider name for display
554
+ provider_display = provider.capitalize()
555
+ ai_prompt_info = f"""
556
+
557
+ ### AI Analysis Configuration
558
+
559
+ **Provider:** {provider_display}
560
+ **Model:** {model}
561
+
562
+ **Prompt Template Used:**
563
+ ```
564
+ You are a precision editor analyzing changes between two versions of text.
565
+ Compare the original and revised text and identify EXACTLY what changed.
566
+
567
+ CRITICAL INSTRUCTIONS:
568
+ - Be extremely specific and precise
569
+ - PAY SPECIAL ATTENTION to quote marks: " vs " vs " (curly vs straight)
570
+ - Check for apostrophe changes: ' vs ' (curly vs straight)
571
+ - Check for dash changes: - vs – vs — (hyphen vs en-dash vs em-dash)
572
+ - Quote the exact words/phrases that changed
573
+ - Use this format: "X" → "Y"
574
+ - For single word changes: quote both words
575
+ - For multiple changes: put each on its own line
576
+ - For punctuation/formatting: describe precisely
577
+ - For additions: "Added: [exact text]"
578
+ - For deletions: "Removed: [exact text]"
579
+ - DO NOT say "No change" unless texts are 100% identical
580
+ - DO NOT use vague terms like "clarified", "improved", "fixed"
581
+ - DO quote the actual changed text
582
+
583
+ Examples of single changes:
584
+ ✓ "pre-cut" → "incision"
585
+ ✓ Curly quotes → straight quotes: "word" → "word"
586
+ ✓ Curly apostrophe → straight: don't → don't
587
+ ✓ "package" → "packaging"
588
+
589
+ Examples of multiple changes (one per line):
590
+ ✓ "split portions" → "divided portions"
591
+ "connected by a" → "connected, via a"
592
+ Curly quotes → straight quotes throughout
593
+ ✓ Added: "carefully"
594
+ ✓ "color" → "colour" (US to UK spelling)
595
+ ✗ Clarified terminology (too vague)
596
+ ✗ Fixed grammar (not specific)
597
+ ✗ Improved word choice (not helpful)
598
+ ```
599
+
600
+ ---
601
+
602
+ """
603
+
604
+ md_content = f"""# Tracked Changes Analysis Report ([Supervertaler](https://github.com/michaelbeijer/Supervertaler) {self.APP_VERSION})
605
+
606
+ ## What is this report?
607
+
608
+ This report analyzes the differences between AI-generated translations and your final edited versions exported from your CAT tool (memoQ, CafeTran, etc.). It shows exactly what you changed during post-editing, helping you review your editing decisions and track your translation workflow improvements.
609
+
610
+ **Use case:** After completing a translation project in your CAT tool with tracked changes enabled, export the bilingual document and load it here to see a detailed breakdown of all modifications made to the AI-generated baseline.
611
+
612
+ ---
613
+
614
+ **Generated:** {timestamp}
615
+ **Total Changes:** {len(data_to_export)}
616
+ **Filter Applied:** {"Yes - " + search_text if export_filtered else "No"}
617
+ **AI Analysis:** {"Enabled" if ai_analysis else "Disabled"}
618
+ {ai_prompt_info}
619
+ """
620
+
621
+ # Process changes with paragraph format
622
+ if ai_analysis:
623
+ # Show progress window
624
+ self.log_queue.put(f"[Export] Generating AI summaries for {len(data_to_export)} changes in batches...")
625
+
626
+ progress_window = tk.Toplevel(self.window)
627
+ progress_window.title("Generating AI Analysis...")
628
+ progress_window.geometry("400x150")
629
+ progress_window.transient(self.window)
630
+ progress_window.grab_set()
631
+
632
+ tk.Label(progress_window, text="Analyzing tracked changes with AI (batched)...",
633
+ font=("Segoe UI", 10)).pack(pady=10)
634
+ progress_label = tk.Label(progress_window, text="Processing batch 0 of 0")
635
+ progress_label.pack()
636
+ batch_info_label = tk.Label(progress_window, text="", font=("Segoe UI", 8), fg="gray")
637
+ batch_info_label.pack()
638
+
639
+ # Process in batches (user-configured)
640
+ # batch_size already set from dialog above
641
+ total_batches = (len(data_to_export) + batch_size - 1) // batch_size
642
+ all_summaries = {}
643
+
644
+ for batch_num in range(total_batches):
645
+ start_idx = batch_num * batch_size
646
+ end_idx = min(start_idx + batch_size, len(data_to_export))
647
+ batch = data_to_export[start_idx:end_idx]
648
+
649
+ progress_label.config(text=f"Processing batch {batch_num + 1} of {total_batches}")
650
+ batch_info_label.config(text=f"Segments {start_idx + 1}-{end_idx} of {len(data_to_export)}")
651
+ progress_window.update()
652
+
653
+ # Generate AI summaries for this batch
654
+ try:
655
+ batch_summaries = self.get_ai_change_summaries_batch(batch, start_idx)
656
+ all_summaries.update(batch_summaries)
657
+ self.log_queue.put(f"[Export] Batch {batch_num + 1}/{total_batches} complete ({len(batch)} segments)")
658
+ except Exception as e:
659
+ self.log_queue.put(f"[Export] Error in batch {batch_num + 1}: {e}")
660
+ # Fill in error messages for failed batch
661
+ for i in range(start_idx, end_idx):
662
+ all_summaries[i] = f"_Error generating summary: {str(e)}_"
663
+
664
+ progress_window.destroy()
665
+
666
+ # Now build the markdown content with the summaries
667
+ for i, (original, final) in enumerate(data_to_export):
668
+ summary = all_summaries.get(i, "_No summary available_")
669
+
670
+ # Add segment in paragraph format
671
+ md_content += f"""### Segment {i + 1}
672
+
673
+ **Target (Original):**
674
+ {original}
675
+
676
+ **Target (Revised):**
677
+ {final}
678
+
679
+ **Change Summary:**
680
+ {summary}
681
+
682
+ ---
683
+
684
+ """
685
+ else:
686
+ # No AI analysis - simpler paragraph format
687
+ for i, (original, final) in enumerate(data_to_export, 1):
688
+ md_content += f"""### Segment {i}
689
+
690
+ **Target (Original):**
691
+ {original}
692
+
693
+ **Target (Revised):**
694
+ {final}
695
+
696
+ ---
697
+
698
+ """
699
+
700
+ md_content += f"""
701
+
702
+ ---
703
+
704
+ ## Summary Statistics
705
+
706
+ - **Total Segments Analyzed:** {len(data_to_export)}
707
+ - **AI Analysis:** {"Enabled" if ai_analysis else "Disabled"}
708
+ - **Export Type:** {"Filtered" if export_filtered else "Complete"}
709
+
710
+ *This report was generated by [Supervertaler](https://github.com/michaelbeijer/Supervertaler) {self.APP_VERSION}*
711
+ """
712
+
713
+ # Write to file
714
+ with open(filepath, 'w', encoding='utf-8') as f:
715
+ f.write(md_content)
716
+
717
+ messagebox.showinfo(
718
+ "Export Successful",
719
+ f"Exported {len(data_to_export)} tracked changes to:\n{filepath}\n\n"
720
+ + ("AI change summaries included." if ai_analysis else "Export completed without AI analysis.")
721
+ )
722
+
723
+ self.log_queue.put(f"[Export] Report saved to: {filepath}")
724
+
725
+ except Exception as e:
726
+ messagebox.showerror(
727
+ "Export Error",
728
+ f"Failed to export tracked changes report:\n{str(e)}"
729
+ )
730
+ self.log_queue.put(f"[Export] Error: {e}")
731
+
732
+ def get_ai_change_summaries_batch(self, changes_batch, start_index):
733
+ """Get AI summaries for a batch of changes - much faster than one-by-one"""
734
+ if not hasattr(self, 'parent_app') or not self.parent_app:
735
+ # Fallback for batch
736
+ return {i: "Modified text" for i in range(start_index, start_index + len(changes_batch))}
737
+
738
+ try:
739
+ provider = self.parent_app.current_llm_provider
740
+ model_name = self.parent_app.current_llm_model
741
+ api_key = ""
742
+
743
+ # Debug logging
744
+ self.log_queue.put(f"[Export] Using provider: {provider}, model: {model_name}")
745
+
746
+ if provider == "claude":
747
+ api_key = self.parent_app.api_keys.get("claude", "")
748
+ elif provider == "gemini":
749
+ api_key = self.parent_app.api_keys.get("google", "")
750
+ elif provider == "openai":
751
+ api_key = self.parent_app.api_keys.get("openai", "")
752
+
753
+ if not api_key:
754
+ self.log_queue.put(f"[Export] ERROR: No API key found for provider: {provider}")
755
+ return {i: "AI unavailable - no API key" for i in range(start_index, start_index + len(changes_batch))}
756
+
757
+ self.log_queue.put(f"[Export] API key found, calling {provider}...")
758
+
759
+ # Build batch prompt with all changes
760
+ batch_prompt = """You are a precision editor analyzing changes between multiple text versions.
761
+ For each numbered pair below, identify EXACTLY what changed.
762
+
763
+ CRITICAL INSTRUCTIONS:
764
+ - Be extremely specific and precise
765
+ - PAY SPECIAL ATTENTION to quote marks: " vs " vs " (curly vs straight)
766
+ - Check for apostrophe changes: ' vs ' (curly vs straight)
767
+ - Check for dash changes: - vs – vs — (hyphen vs en-dash vs em-dash)
768
+ - Quote the exact words/phrases that changed
769
+ - Use format: "X" → "Y"
770
+ - For multiple changes in one segment: put each on its own line
771
+ - For punctuation/formatting: describe precisely (e.g., 'Curly quotes → straight quotes: "word" → "word"')
772
+ - DO NOT say "No change" unless texts are 100% identical (byte-for-byte)
773
+ - DO NOT use vague terms like "clarified", "improved", "fixed"
774
+ - DO quote the actual changed text
775
+
776
+ IMPORTANT: If only punctuation changed (quotes, apostrophes, dashes), you MUST report it!
777
+
778
+ """
779
+
780
+ # Add all changes to the prompt
781
+ for i, (original, final) in enumerate(changes_batch):
782
+ batch_prompt += f"""
783
+ [{i + 1}] ORIGINAL: {original}
784
+ REVISED: {final}
785
+
786
+ """
787
+
788
+ batch_prompt += """
789
+ Now provide the change summary for each segment, formatted as:
790
+
791
+ [1] your precise summary here
792
+ [2] your precise summary here
793
+ [3] your precise summary here
794
+
795
+ (etc. for all segments)"""
796
+
797
+ # Call AI based on provider
798
+ self.log_queue.put(f"[Export] Checking provider condition: {provider} == gemini? {provider == 'gemini'}, GEMINI_AVAILABLE? {self.GEMINI_AVAILABLE}")
799
+ if provider == "gemini" and self.GEMINI_AVAILABLE:
800
+ import google.generativeai as genai
801
+ genai.configure(api_key=api_key)
802
+ model = genai.GenerativeModel(model_name)
803
+
804
+ response = model.generate_content(batch_prompt)
805
+ response_text = response.text.strip()
806
+
807
+ elif provider == "claude" and self.ANTHROPIC_AVAILABLE:
808
+ import anthropic
809
+ client = anthropic.Anthropic(api_key=api_key)
810
+
811
+ message = client.messages.create(
812
+ model=model_name,
813
+ max_tokens=2000, # Larger for batch
814
+ messages=[{
815
+ "role": "user",
816
+ "content": batch_prompt
817
+ }]
818
+ )
819
+
820
+ response_text = message.content[0].text.strip()
821
+
822
+ elif provider == "openai" and self.OPENAI_AVAILABLE:
823
+ import openai
824
+ client = openai.OpenAI(api_key=api_key)
825
+
826
+ response = client.chat.completions.create(
827
+ model=model_name,
828
+ max_tokens=2000, # Larger for batch
829
+ messages=[{
830
+ "role": "user",
831
+ "content": batch_prompt
832
+ }]
833
+ )
834
+
835
+ response_text = response.choices[0].message.content.strip()
836
+ else:
837
+ self.log_queue.put(f"[Export] ERROR: No matching provider condition for {provider}")
838
+ return {i: "Provider not available" for i in range(start_index, start_index + len(changes_batch))}
839
+
840
+ # Parse the response to extract individual summaries
841
+ summaries = {}
842
+ current_num = None
843
+ current_summary_lines = []
844
+
845
+ for line in response_text.split('\n'):
846
+ line = line.strip()
847
+ if not line:
848
+ continue
849
+
850
+ # Check if line starts with [N]
851
+ match = re.match(r'^\[(\d+)\]\s*(.*)$', line)
852
+ if match:
853
+ # Save previous summary if any
854
+ if current_num is not None:
855
+ summary_text = '\n'.join(current_summary_lines).strip()
856
+ summaries[start_index + current_num - 1] = summary_text
857
+
858
+ # Start new summary
859
+ current_num = int(match.group(1))
860
+ summary_start = match.group(2).strip()
861
+ current_summary_lines = [summary_start] if summary_start else []
862
+ elif current_num is not None:
863
+ # Continuation of current summary
864
+ current_summary_lines.append(line)
865
+
866
+ # Save last summary
867
+ if current_num is not None:
868
+ summary_text = '\n'.join(current_summary_lines).strip()
869
+ summaries[start_index + current_num - 1] = summary_text
870
+
871
+ # Fill in any missing summaries
872
+ for i in range(len(changes_batch)):
873
+ if (start_index + i) not in summaries:
874
+ summaries[start_index + i] = "_Summary not parsed correctly_"
875
+
876
+ return summaries
877
+
878
+ except Exception as e:
879
+ self.log_queue.put(f"[AI Batch] Error: {e}")
880
+ return {i: f"Analysis failed: {str(e)}" for i in range(start_index, start_index + len(changes_batch))}
881
+
882
+
883
+ def format_tracked_changes_context(tracked_changes_list, max_length=1000):
884
+ """Format tracked changes for AI context, keeping within token limits"""
885
+ if not tracked_changes_list:
886
+ return ""
887
+
888
+ context_parts = ["TRACKED CHANGES REFERENCE (Original→Final editing patterns):"]
889
+ current_length = len(context_parts[0])
890
+
891
+ for i, (original, final) in enumerate(tracked_changes_list):
892
+ change_text = f"• \"{original}\" → \"{final}\""
893
+ if current_length + len(change_text) > max_length:
894
+ if i > 0: # Only add if we have at least one example
895
+ context_parts.append("(Additional examples truncated to save space)")
896
+ break
897
+ context_parts.append(change_text)
898
+ current_length += len(change_text)
899
+
900
+ return "\n".join(context_parts) + "\n"