umpaper-fetch 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
umpaper_fetch/__init__.py CHANGED
@@ -5,7 +5,7 @@ This package provides tools to automatically download past year exam papers
5
5
  from University Malaya's repository through an automated browser interface.
6
6
  """
7
7
 
8
- __version__ = "1.0.2"
8
+ __version__ = "1.0.4"
9
9
  __author__ = "Marcus Mah"
10
10
  __email__ = "marcusmah6969@gmail.com"
11
11
  __description__ = "Automated downloader for University Malaya past year exam papers"
umpaper_fetch/cli.py CHANGED
@@ -95,7 +95,7 @@ Examples:
95
95
  parser.add_argument(
96
96
  '--version',
97
97
  action='version',
98
- version='%(prog)s 1.0.0'
98
+ version='%(prog)s 1.0.4'
99
99
  )
100
100
 
101
101
  return parser.parse_args()
@@ -311,14 +311,33 @@ def main():
311
311
  print(f"📁 Individual files: {output_dir / subject_code}")
312
312
 
313
313
  # Ask if user wants to delete individual files
314
- print(f"\n📁 Individual PDF files are still in: {output_dir / subject_code}")
314
+ subject_dir = output_dir / subject_code
315
+ print(f"\n📁 Individual PDF files are still in: {subject_dir}")
315
316
  delete_confirm = input("Delete individual files to save space? (y/N): ").strip().lower()
316
317
  if delete_confirm in ['y', 'yes']:
317
318
  try:
318
- import shutil
319
- shutil.rmtree(output_dir / subject_code)
320
- print("✅ Individual files deleted successfully")
321
- logger.info("Individual files deleted by user request")
319
+ # Delete individual PDF files
320
+ deleted_count = 0
321
+ for file_path in downloaded_files:
322
+ try:
323
+ file_path = Path(file_path)
324
+ if file_path.exists():
325
+ file_path.unlink()
326
+ deleted_count += 1
327
+ logger.debug(f"Deleted: {file_path}")
328
+ except Exception as file_error:
329
+ logger.warning(f"Could not delete {file_path}: {file_error}")
330
+
331
+ # Try to remove the subject directory if it's empty
332
+ try:
333
+ if subject_dir.exists() and not any(subject_dir.iterdir()):
334
+ subject_dir.rmdir()
335
+ logger.debug(f"Removed empty directory: {subject_dir}")
336
+ except Exception as dir_error:
337
+ logger.debug(f"Could not remove directory {subject_dir}: {dir_error}")
338
+
339
+ print(f"✅ Individual files deleted successfully ({deleted_count} files)")
340
+ logger.info(f"Individual files deleted by user request ({deleted_count} files)")
322
341
  except Exception as e:
323
342
  print(f"⚠️ Failed to delete individual files: {e}")
324
343
  logger.warning(f"Failed to delete individual files: {e}")
@@ -0,0 +1,363 @@
1
+ Metadata-Version: 2.4
2
+ Name: umpaper-fetch
3
+ Version: 1.0.4
4
+ Summary: Automated downloader for University Malaya past year exam papers
5
+ Home-page: https://github.com/MarcusMQF/umpaper-fetch
6
+ Author: Marcus Mah
7
+ Author-email: Marcus Mah <marcusmah6969@gmail.com>
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/MarcusMQF/umpaper-fetch
10
+ Project-URL: Documentation, https://github.com/MarcusMQF/umpaper-fetch#readme
11
+ Project-URL: Repository, https://github.com/MarcusMQF/umpaper-fetch
12
+ Project-URL: Bug Reports, https://github.com/MarcusMQF/umpaper-fetch/issues
13
+ Keywords: university,malaya,um,exam,papers,downloader,automation,selenium
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Education
16
+ Classifier: Topic :: Education
17
+ Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
18
+ Classifier: Topic :: Utilities
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.8
22
+ Classifier: Programming Language :: Python :: 3.9
23
+ Classifier: Programming Language :: Python :: 3.10
24
+ Classifier: Programming Language :: Python :: 3.11
25
+ Classifier: Programming Language :: Python :: 3.12
26
+ Classifier: Operating System :: OS Independent
27
+ Classifier: Environment :: Console
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown
30
+ License-File: LICENSE
31
+ Requires-Dist: selenium>=4.15.2
32
+ Requires-Dist: requests>=2.31.0
33
+ Requires-Dist: beautifulsoup4>=4.12.2
34
+ Requires-Dist: webdriver-manager>=4.0.1
35
+ Requires-Dist: lxml>=4.9.3
36
+ Requires-Dist: urllib3>=2.0.7
37
+ Requires-Dist: certifi>=2023.7.22
38
+ Requires-Dist: tqdm>=4.66.1
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=7.0; extra == "dev"
41
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
42
+ Requires-Dist: black>=22.0; extra == "dev"
43
+ Requires-Dist: flake8>=5.0; extra == "dev"
44
+ Requires-Dist: mypy>=1.0; extra == "dev"
45
+ Dynamic: author
46
+ Dynamic: home-page
47
+ Dynamic: license-file
48
+ Dynamic: requires-python
49
+
50
+ # 🎓 Open Sourse UM PastYear Paper Downloader
51
+
52
+ **One-click bulk download solution for University Malaya (UM) past year exam papers**
53
+
54
+ Automate the tedious process of manually downloading past year papers one by one. Simply provide your UM credentials and subject code, and get all available papers in a single organized ZIP file.
55
+
56
+ [![PyPI version](https://badge.fury.io/py/umpaper-fetch.svg)](https://badge.fury.io/py/umpaper-fetch)
57
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
58
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
59
+
60
+ ---
61
+
62
+ ## 🚀 Quick Start (For Regular Users)
63
+
64
+ ### **Installation**
65
+ ```bash
66
+ # Install from PyPI
67
+ pip install umpaper-fetch
68
+
69
+ # Upgrade to latest version
70
+ pip install --upgrade umpaper-fetch
71
+ ```
72
+
73
+ ### **Basic Usage**
74
+ ```bash
75
+ # Run the downloader
76
+ python -m umpaper_fetch.cli
77
+
78
+ # Or with command-line shortcut (if available)
79
+ um-papers
80
+ ```
81
+
82
+ ### **First Run**
83
+ Follow the interactive prompts:
84
+ 1. Enter your UM username (without @siswa.um.edu.my)
85
+ 2. Enter your password securely
86
+ 3. Enter subject code (e.g., WIA1005)
87
+ 4. Choose download location
88
+ 5. Confirm download of found papers
89
+
90
+ ---
91
+
92
+ ## ✨ Key Features
93
+
94
+ ### 🚀 **Core Functionality**
95
+ - **🔄 One-Click Bulk Download**: Download all past year papers for any subject code automatically
96
+ - **📦 Smart ZIP Organization**: Automatically organizes papers by year and creates a structured ZIP archive
97
+ - **🔐 Secure Authentication**: Handles complex UM OpenAthens authentication flow seamlessly
98
+ - **⚡ Concurrent Downloads**: Multi-threaded downloading for faster performance
99
+ - **🔄 Auto-Retry Logic**: Robust error handling with configurable retry attempts
100
+ - **📊 Real-time Progress**: Live progress bars and detailed status updates
101
+
102
+ ### 📁 **File Organization**
103
+ - **📂 Hierarchical Structure**: Papers organized by subject → year → semester
104
+ - **🏷️ Smart File Naming**: Automatically detects and preserves meaningful filenames
105
+ - **📋 Auto-Generated README**: Includes download summary and paper inventory in ZIP
106
+ - **🗂️ Organized Output**: Individual PDFs + consolidated ZIP file
107
+ - **🧹 Optional Cleanup**: Choice to keep individual files or ZIP only
108
+
109
+ ### 🖥️ **User Experience**
110
+ - **📱 Terminal-Based Interface**: Clean, intuitive command-line interface
111
+ - **🎯 Interactive Mode**: Prompts for credentials and settings when needed
112
+ - **⚙️ Command-Line Mode**: Full automation with command-line arguments
113
+ - **📍 Custom Download Locations**: Choose where to save your papers
114
+ - **🔍 Browser Options**: Support for Edge, Chrome with auto-detection
115
+ - **📝 Comprehensive Logging**: Detailed logs for troubleshooting
116
+
117
+ ---
118
+
119
+ ## 📋 Complete Command Reference
120
+
121
+ ### **For Regular Users**
122
+
123
+ #### **Interactive Mode (Recommended for beginners)**
124
+ ```bash
125
+ python -m umpaper_fetch.cli
126
+ ```
127
+ *Prompts for all required information*
128
+
129
+ #### **Quick Commands**
130
+ ```bash
131
+ # With username and subject code
132
+ python -m umpaper_fetch.cli --username john_doe --subject-code WIA1005
133
+
134
+ # With custom output directory
135
+ python -m umpaper_fetch.cli -u student123 -s WXES1116 -o "C:/Downloads/Papers"
136
+
137
+ # Skip location prompt for automation
138
+ python -m umpaper_fetch.cli -s WIA1005 --no-location-prompt
139
+ ```
140
+
141
+ #### **Available Options**
142
+ | Command | Short | Description | Default |
143
+ |---------|-------|-------------|---------|
144
+ | `--username` | `-u` | UM username (without @siswa.um.edu.my) | *prompted* |
145
+ | `--subject-code` | `-s` | Subject code to search for (e.g., WIA1005) | *prompted* |
146
+ | `--output-dir` | `-o` | Custom download directory | `./downloads` |
147
+ | `--browser` | `-b` | Browser choice: `auto`, `chrome`, `edge` | `edge` |
148
+ | `--timeout` | | Session timeout in seconds | `30` |
149
+ | `--max-retries` | | Maximum retry attempts for failed downloads | `3` |
150
+ | `--no-location-prompt` | | Skip interactive location selection | `false` |
151
+ | `--verbose` | `-v` | Enable detailed debug logging | `false` |
152
+
153
+ ### **For Developers & Advanced Users**
154
+
155
+ #### **Development Installation**
156
+ ```bash
157
+ # Clone repository
158
+ git clone https://github.com/MarcusMQF/umpaper-fetch.git
159
+ cd umpaper-fetch
160
+
161
+ # Install in development mode
162
+ pip install -e .
163
+
164
+ # Install development dependencies
165
+ pip install -e .[dev]
166
+ ```
167
+
168
+ #### **Debug Commands**
169
+ ```bash
170
+ # Show browser window for debugging
171
+ python -m umpaper_fetch.cli --show-browser --verbose --subject-code WIA1005
172
+
173
+ # High-performance mode with extended timeouts
174
+ python -m umpaper_fetch.cli -s WIA1005 --max-retries 5 --timeout 60
175
+
176
+ # Force specific browser
177
+ python -m umpaper_fetch.cli --browser chrome --subject-code CSC1025
178
+ ```
179
+
180
+ #### **Developer Options**
181
+ | Command | Description | Use Case |
182
+ |---------|-------------|----------|
183
+ | `--show-browser` | Show browser window (disable headless mode) | Debugging authentication |
184
+ | `--verbose` | Enable detailed debug logging | Troubleshooting issues |
185
+ | `--timeout 60` | Extended session timeout | Slow connections |
186
+ | `--max-retries 5` | More retry attempts | Unstable connections |
187
+
188
+ ---
189
+
190
+ ## 💡 Tips for Best Experience
191
+
192
+ ### **Choose the Right Browser**
193
+ ```bash
194
+ # Windows users (recommended)
195
+ python -m umpaper_fetch.cli --browser edge --subject-code WIA1005
196
+
197
+ # Mac/Linux users
198
+ python -m umpaper_fetch.cli --browser chrome --subject-code WIA1005
199
+
200
+ # Auto-detect (fallback)
201
+ python -m umpaper_fetch.cli --browser auto --subject-code WIA1005
202
+ ```
203
+
204
+ ### **Optimize for Your Connection**
205
+ ```bash
206
+ # For slow/unstable connections
207
+ python -m umpaper_fetch.cli --timeout 90 --max-retries 5 --subject-code WIA1005
208
+
209
+ # For fast connections
210
+ python -m umpaper_fetch.cli --timeout 15 --max-retries 2 --subject-code WIA1005
211
+ ```
212
+
213
+ ### **Batch Processing Multiple Subjects**
214
+ ```bash
215
+ # Process multiple subjects
216
+ python -m umpaper_fetch.cli -s WIA1005 --no-location-prompt -o "./Papers/WIA1005"
217
+ python -m umpaper_fetch.cli -s WIX1116 --no-location-prompt -o "./Papers/WXES1116"
218
+ python -m umpaper_fetch.cli -s CSC1025 --no-location-prompt -o "./Papers/CSC1025"
219
+ ```
220
+
221
+ ### **Automation-Friendly Commands**
222
+ ```bash
223
+ # Fully automated (only prompts for password)
224
+ python -m umpaper_fetch.cli -u your_username -s WIA1005 --no-location-prompt -o "./Papers"
225
+
226
+ # Silent mode with custom browser
227
+ python -m umpaper_fetch.cli -u your-username -s WXES1116 --browser edge --no-location-prompt
228
+ ```
229
+
230
+ ---
231
+
232
+ ## 📊 What You Get
233
+
234
+ ### **Organized File Structure**
235
+ ```
236
+ 📁 downloads/
237
+ ├── 📁 WIA1005/
238
+ │ ├── 📁 Year_2023/
239
+ │ │ ├── WIA1005_Final_2023_S1.pdf
240
+ │ │ └── WIA1005_Final_2023_S2.pdf
241
+ │ ├── 📁 Year_2022/
242
+ │ │ ├── WIA1005_Final_2022_S1.pdf
243
+ │ │ └── WIA1005_Final_2022_S2.pdf
244
+ │ └── 📁 Unsorted/
245
+ │ └── WIA1005_Additional_Papers.pdf
246
+ ├── 📦 WIA1005_past_years.zip
247
+ └── 📄 WIA1005_README.txt
248
+ ```
249
+
250
+ ### **ZIP Archive Contents**
251
+ - **Hierarchical Organization**: Subject → Year → Files
252
+ - **Automatic README**: Download summary and file inventory
253
+ - **Optimized Compression**: Balanced compression for size/speed
254
+ - **Preserve Metadata**: Original filenames and dates maintained
255
+
256
+ ---
257
+
258
+ ## 🔧 Prerequisites & Setup
259
+
260
+ ### **System Requirements**
261
+ - **Python 3.8+** installed
262
+ - **Internet connection** (stable recommended)
263
+ - **UM student account** with active credentials
264
+ - **Browser**: Microsoft Edge (Windows) or Google Chrome (Mac/Linux)
265
+
266
+ ### **Browser Setup**
267
+ - **Windows**: Microsoft Edge (pre-installed, recommended)
268
+ - **Mac/Linux**: Google Chrome (install from google.com/chrome)
269
+ - **Auto-detection**: Tool will find the best available browser
270
+
271
+ ### **Firewall/Network**
272
+ - Tool connects to `exampaper.um.edu.my` via HTTPS
273
+ - No special firewall configuration needed
274
+ - Works on UM campus network and external networks
275
+
276
+ ---
277
+
278
+ ## 🎯 Quick Command Cheat Sheet
279
+
280
+ ### **For Regular Users**
281
+ ```bash
282
+ # Install and run
283
+ pip install umpaper-fetch
284
+ python -m umpaper_fetch.cli
285
+
286
+ # Quick download with subject code
287
+ python -m umpaper_fetch.cli -s WIA1005
288
+
289
+ # Custom download location
290
+ python -m umpaper_fetch.cli -s WIA1005 -o "C:/MyPapers"
291
+
292
+ # Batch mode (no prompts except password)
293
+ python -m umpaper_fetch.cli -u your_username -s WIA1005 --no-location-prompt
294
+ ```
295
+
296
+ ### **For Developers**
297
+ ```bash
298
+ # Development setup
299
+ git clone https://github.com/MarcusMQF/umpaper-fetch.git
300
+ cd umpaper-fetch
301
+ pip install -e .[dev]
302
+
303
+ # Debug mode
304
+ python -m umpaper_fetch.cli --show-browser --verbose -s WIA1005
305
+
306
+ # Performance testing
307
+ python -m umpaper_fetch.cli --max-retries 5 --timeout 60 -s WXES1116
308
+ ```
309
+
310
+ ---
311
+
312
+ ## 🔒 Security & Privacy
313
+
314
+ ### **What We Do**
315
+ - ✅ Use secure HTTPS connections only
316
+ - ✅ Handle UM authentication through official channels
317
+ - ✅ Clean up browser data after each session
318
+ - ✅ Never store or log passwords
319
+ - ✅ Respect server rate limits
320
+
321
+ ### **What We Don't Do**
322
+ - ❌ Store credentials anywhere
323
+ - ❌ Bypass security measures
324
+ - ❌ Access unauthorized content
325
+ - ❌ Share or transmit personal data
326
+ - ❌ Violate UM terms of service
327
+
328
+ ---
329
+
330
+ ## ⚖️ Legal & Academic Use
331
+
332
+ **Educational Purpose Only**: This tool is designed for UM students to efficiently access past year papers for their studies. Users must:
333
+ - Have valid UM credentials
334
+ - Comply with UM's terms of service
335
+ - Use papers for academic purposes only
336
+ - Respect copyright and intellectual property rights
337
+
338
+ **Disclaimer**: This is an unofficial tool not affiliated with University Malaya.
339
+
340
+ ---
341
+
342
+ ## 🤝 Support & Contributing
343
+
344
+ ### **Get Help**
345
+ - 📖 Check this README for common usage patterns
346
+ - 🐛 Report issues on [GitHub Issues](https://github.com/MarcusMQF/umpaper-fetch/issues)
347
+ - 💡 Request features via GitHub Issues
348
+
349
+ ### **Contributing**
350
+ Contributions welcome! Please:
351
+ 1. Fork the repository
352
+ 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
353
+ 3. Make your changes with tests
354
+ 4. Submit a pull request
355
+ 5. Follow existing code style
356
+
357
+ ### **Development Setup**
358
+ ```bash
359
+ git clone https://github.com/MarcusMQF/umpaper-fetch.git
360
+ cd umpaper-fetch
361
+ pip install -e .[dev]
362
+ pytest # Run tests
363
+ ```
@@ -5,8 +5,8 @@ downloader/__init__.py,sha256=Qq3Oex541SjsJBe4Tx2lR7BjCHbPZaGGAscWaU3gUB4,21
5
5
  downloader/pdf_downloader.py,sha256=5g9_suFe6rG7Vjny7gScRkCSVPiYxKwB7NH8PTPR2Z0,7957
6
6
  scraper/__init__.py,sha256=cNGEi--R5BTUOwpnANA2BsR2yfnmG9qrwagEOtzI0E4,18
7
7
  scraper/paper_scraper.py,sha256=NNnJ6eFxgueJfYwZFbTbsnxMAUUbIM0A72q0QNubaVk,12970
8
- umpaper_fetch/__init__.py,sha256=P8krJn88eUBf0TcuYrZ3DWRw_nSEiN0BHPBQ0WuxVKQ,830
9
- umpaper_fetch/cli.py,sha256=zow8oPtZBzsQnSgdaBTpoh8qPJ9-pHygSPtKRvGS2Z0,12435
8
+ umpaper_fetch/__init__.py,sha256=kjuy6GZAn-kyDuctTUKHK89qFLOU3f18AncMRbSjJcw,830
9
+ umpaper_fetch/cli.py,sha256=CSX3Z29MN8TrTNJpp6SfMmZnXo8bHLJSivsL5ANSn0w,13545
10
10
  umpaper_fetch/auth/__init__.py,sha256=AMRDJpqoFyQt3kcRcihx7LMAIFmWeJe_wfy-i4JyUXs,25
11
11
  umpaper_fetch/auth/chrome_fix.py,sha256=sSTvUIWLme5e-raXCM_AJ2IkK3PHSrxNv_g7itSolkg,3828
12
12
  umpaper_fetch/auth/um_authenticator.py,sha256=Rii9E6-2MvE_orcMW4u93tSY8QaT21P_HJeJPExDUzQ,24165
@@ -17,12 +17,12 @@ umpaper_fetch/scraper/paper_scraper.py,sha256=NNnJ6eFxgueJfYwZFbTbsnxMAUUbIM0A72
17
17
  umpaper_fetch/utils/__init__.py,sha256=oukU0ufroPRd8_N8d2xiFes9CTxSaw4NA6p2nS1kkSg,16
18
18
  umpaper_fetch/utils/logger.py,sha256=LJhnN3KoAB9bX4BdfK6bk2urnQEVwG1FFlOeFGbVg7U,2098
19
19
  umpaper_fetch/utils/zip_creator.py,sha256=44dnsMdJRvOktjl4ajDqaxTiYfznbkERzzpcnGogS5I,10731
20
- umpaper_fetch-1.0.2.dist-info/licenses/LICENSE,sha256=bwdpp_9mjxdcCEY-7f8XVEQDoyImNLNDUtYGezKFbFk,1113
20
+ umpaper_fetch-1.0.4.dist-info/licenses/LICENSE,sha256=bwdpp_9mjxdcCEY-7f8XVEQDoyImNLNDUtYGezKFbFk,1113
21
21
  utils/__init__.py,sha256=oukU0ufroPRd8_N8d2xiFes9CTxSaw4NA6p2nS1kkSg,16
22
22
  utils/logger.py,sha256=LJhnN3KoAB9bX4BdfK6bk2urnQEVwG1FFlOeFGbVg7U,2098
23
23
  utils/zip_creator.py,sha256=Mr5rT52vEyNQJB0Cg99r6QTNJBnzqGnnqd3wPK4uaxY,10704
24
- umpaper_fetch-1.0.2.dist-info/METADATA,sha256=qKzBI702Rwn9N64zUmCkZproH-kErAvWt8nwXo7jSSg,15372
25
- umpaper_fetch-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
- umpaper_fetch-1.0.2.dist-info/entry_points.txt,sha256=8XnqR8M2yO02n6DcUdJXoSVpgBG4TvaSRNBqi4A_XcQ,53
27
- umpaper_fetch-1.0.2.dist-info/top_level.txt,sha256=NU9S9Nnj8CJvOZvkoKbicMNk0apwlHJjdL6yjU7pnWs,44
28
- umpaper_fetch-1.0.2.dist-info/RECORD,,
24
+ umpaper_fetch-1.0.4.dist-info/METADATA,sha256=vfZaVtkZW5A_wTIwsC6SQcr4wTf--swv1FnUbsM9CXI,12296
25
+ umpaper_fetch-1.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
+ umpaper_fetch-1.0.4.dist-info/entry_points.txt,sha256=8XnqR8M2yO02n6DcUdJXoSVpgBG4TvaSRNBqi4A_XcQ,53
27
+ umpaper_fetch-1.0.4.dist-info/top_level.txt,sha256=NU9S9Nnj8CJvOZvkoKbicMNk0apwlHJjdL6yjU7pnWs,44
28
+ umpaper_fetch-1.0.4.dist-info/RECORD,,
@@ -1,462 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: umpaper-fetch
3
- Version: 1.0.2
4
- Summary: Automated downloader for University Malaya past year exam papers
5
- Home-page: https://github.com/MarcusMQF/umpaper-fetch
6
- Author: Marcus Mah
7
- Author-email: Marcus Mah <marcusmah6969@gmail.com>
8
- License: MIT
9
- Project-URL: Homepage, https://github.com/MarcusMQF/umpaper-fetch
10
- Project-URL: Documentation, https://github.com/MarcusMQF/umpaper-fetch#readme
11
- Project-URL: Repository, https://github.com/MarcusMQF/umpaper-fetch
12
- Project-URL: Bug Reports, https://github.com/MarcusMQF/umpaper-fetch/issues
13
- Keywords: university,malaya,um,exam,papers,downloader,automation,selenium
14
- Classifier: Development Status :: 4 - Beta
15
- Classifier: Intended Audience :: Education
16
- Classifier: Topic :: Education
17
- Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
18
- Classifier: Topic :: Utilities
19
- Classifier: License :: OSI Approved :: MIT License
20
- Classifier: Programming Language :: Python :: 3
21
- Classifier: Programming Language :: Python :: 3.8
22
- Classifier: Programming Language :: Python :: 3.9
23
- Classifier: Programming Language :: Python :: 3.10
24
- Classifier: Programming Language :: Python :: 3.11
25
- Classifier: Programming Language :: Python :: 3.12
26
- Classifier: Operating System :: OS Independent
27
- Classifier: Environment :: Console
28
- Requires-Python: >=3.8
29
- Description-Content-Type: text/markdown
30
- License-File: LICENSE
31
- Requires-Dist: selenium>=4.15.2
32
- Requires-Dist: requests>=2.31.0
33
- Requires-Dist: beautifulsoup4>=4.12.2
34
- Requires-Dist: webdriver-manager>=4.0.1
35
- Requires-Dist: lxml>=4.9.3
36
- Requires-Dist: urllib3>=2.0.7
37
- Requires-Dist: certifi>=2023.7.22
38
- Requires-Dist: tqdm>=4.66.1
39
- Provides-Extra: dev
40
- Requires-Dist: pytest>=7.0; extra == "dev"
41
- Requires-Dist: pytest-cov>=4.0; extra == "dev"
42
- Requires-Dist: black>=22.0; extra == "dev"
43
- Requires-Dist: flake8>=5.0; extra == "dev"
44
- Requires-Dist: mypy>=1.0; extra == "dev"
45
- Dynamic: author
46
- Dynamic: home-page
47
- Dynamic: license-file
48
- Dynamic: requires-python
49
-
50
- # 🎓 UM Past Year Paper Downloader - PaperFetch
51
-
52
- **One-click bulk download solution for University Malaya (UM) past year exam papers**
53
-
54
- Automate the tedious process of manually downloading past year papers one by one. Simply provide your UM credentials and subject code, and get all available papers in a single organized ZIP file.
55
-
56
- ---
57
-
58
- ## ✨ Key Features
59
-
60
- ### 🚀 **Core Functionality**
61
- - **🔄 One-Click Bulk Download**: Download all past year papers for any subject code automatically
62
- - **📦 Smart ZIP Organization**: Automatically organizes papers by year and creates a structured ZIP archive
63
- - **🔐 Secure Authentication**: Handles complex UM OpenAthens authentication flow seamlessly
64
- - **⚡ Concurrent Downloads**: Multi-threaded downloading for faster performance
65
- - **🔄 Auto-Retry Logic**: Robust error handling with configurable retry attempts
66
- - **📊 Real-time Progress**: Live progress bars and detailed status updates
67
-
68
- ### 📁 **File Organization**
69
- - **📂 Hierarchical Structure**: Papers organized by subject → year → semester
70
- - **🏷️ Smart File Naming**: Automatically detects and preserves meaningful filenames
71
- - **📋 Auto-Generated README**: Includes download summary and paper inventory in ZIP
72
- - **🗂️ Organized Output**: Individual PDFs + consolidated ZIP file
73
- - **🧹 Optional Cleanup**: Choice to keep individual files or ZIP only
74
-
75
- ### 🖥️ **User Experience**
76
- - **📱 Terminal-Based Interface**: Clean, intuitive command-line interface
77
- - **🎯 Interactive Mode**: Prompts for credentials and settings when needed
78
- - **⚙️ Command-Line Mode**: Full automation with command-line arguments
79
- - **📍 Custom Download Locations**: Choose where to save your papers
80
- - **🔍 Browser Options**: Support for Edge, Chrome with auto-detection
81
- - **📝 Comprehensive Logging**: Detailed logs for troubleshooting
82
-
83
- ### 🔒 **Security & Reliability**
84
- - **🛡️ Secure Password Input**: Hidden password entry (never stored/logged)
85
- - **🧹 Session Cleanup**: Automatic browser data cleanup after use
86
- - **✅ Download Verification**: Validates PDF integrity after download
87
- - **🔐 HTTPS Enforcement**: Secure connections to UM servers
88
- - **⏱️ Configurable Timeouts**: Customizable session and download timeouts
89
-
90
- ---
91
-
92
- ## 📋 Complete Command Reference
93
-
94
- ### **Available Commands (9 total)**
95
-
96
- | Command | Short | Description | Default |
97
- |---------|-------|-------------|---------|
98
- | `--username` | `-u` | UM username (without @siswa.um.edu.my) | *prompted* |
99
- | `--subject-code` | `-s` | Subject code to search for (e.g., WIA1005) | *prompted* |
100
- | `--output-dir` | `-o` | Custom download directory | `./downloads` |
101
- | `--browser` | `-b` | Browser choice: `auto`, `chrome`, `edge` | `edge` |
102
- | `--timeout` | | Session timeout in seconds | `30` |
103
- | `--max-retries` | | Maximum retry attempts for failed downloads | `3` |
104
- | `--show-browser` | | Show browser window (disable headless mode) | `false` |
105
- | `--no-location-prompt` | | Skip interactive location selection | `false` |
106
- | `--verbose` | `-v` | Enable detailed debug logging | `false` |
107
-
108
- ### **Usage Examples**
109
-
110
- **1. Interactive Mode (Recommended for first-time users)**
111
- ```bash
112
- python main.py
113
- ```
114
- *Prompts for username, password, subject code, and download location*
115
-
116
- **2. Quick Command-Line Mode**
117
- ```bash
118
- python main.py --username john_doe --subject-code WIA1005
119
- ```
120
- *Only prompts for password*
121
-
122
- **3. Fully Automated Mode**
123
- ```bash
124
- python main.py -u student123 -s WXES1116 -o "C:/Downloads/Papers" --no-location-prompt
125
- ```
126
- *No prompts except secure password entry*
127
-
128
- **4. Debug Mode with Visible Browser**
129
- ```bash
130
- python main.py --subject-code WIA1005 --show-browser --verbose
131
- ```
132
- *Shows browser actions and detailed logging*
133
-
134
- **5. High-Performance Mode**
135
- ```bash
136
- python main.py -s WIA1005 --max-retries 5 --timeout 60
137
- ```
138
- *Extended timeouts and retries for slow connections*
139
-
140
- **6. Custom Browser Selection**
141
- ```bash
142
- python main.py --browser chrome --subject-code CSC1025
143
- ```
144
- *Force use of Chrome browser*
145
-
146
- ---
147
-
148
- ## 🚀 Quick Start Guide
149
-
150
- ### **Prerequisites**
151
- - Python 3.8+ installed
152
- - One of these browsers: **Microsoft Edge** (recommended), Google Chrome
153
- - UM student account with active credentials
154
- - Stable internet connection
155
-
156
- ### **Installation**
157
- ```bash
158
- # 1. Clone/download this repository
159
- git clone <repository-url>
160
- cd um-past-year-downloader
161
-
162
- # 2. Install dependencies
163
- pip install -r requirements.txt
164
-
165
- # 3. Ready to use!
166
- python main.py
167
- ```
168
-
169
- ### **First Run**
170
- ```bash
171
- python main.py
172
- ```
173
- Follow the interactive prompts:
174
- 1. Enter your UM username (without @siswa.um.edu.my)
175
- 2. Enter your password securely
176
- 3. Enter subject code (e.g., WIA1005)
177
- 4. Choose download location
178
- 5. Confirm download of found papers
179
-
180
- ---
181
-
182
- ## 📊 What You Get
183
-
184
- ### **Organized File Structure**
185
- ```
186
- 📁 downloads/
187
- ├── 📁 WIA1005/
188
- │ ├── 📁 Year_2023/
189
- │ │ ├── WIA1005_Final_2023_S1.pdf
190
- │ │ └── WIA1005_Final_2023_S2.pdf
191
- │ ├── 📁 Year_2022/
192
- │ │ ├── WIA1005_Final_2022_S1.pdf
193
- │ │ └── WIA1005_Final_2022_S2.pdf
194
- │ └── 📁 Unsorted/
195
- │ └── WIA1005_Additional_Papers.pdf
196
- ├── 📦 WIA1005_past_years.zip
197
- └── 📄 WIA1005_README.txt
198
- ```
199
-
200
- ### **ZIP Archive Contents**
201
- - **Hierarchical Organization**: Subject → Year → Files
202
- - **Automatic README**: Download summary and file inventory
203
- - **Optimized Compression**: Balanced compression for size/speed
204
- - **Preserve Metadata**: Original filenames and dates maintained
205
-
206
- ### **Generated Reports**
207
- - **Download Summary**: Shows total papers found and downloaded
208
- - **Failed Downloads**: Lists any papers that couldn't be downloaded
209
- - **File Inventory**: Complete list of papers with years and types
210
- - **Timestamp**: When the download was performed
211
-
212
- ---
213
-
214
- ## 🔧 Advanced Configuration
215
-
216
- ### **Browser Selection Guide**
217
-
218
- | Browser | Best For | Advantages | Notes |
219
- |---------|----------|------------|-------|
220
- | **Edge** | Windows users | Built-in, no driver conflicts, memory efficient | **Recommended** |
221
- | **Chrome** | Mac/Linux users | Wide compatibility, stable | May need driver updates |
222
- | **Auto** | Uncertain | Detects best available | Falls back to Edge → Chrome |
223
-
224
- ### **Performance Tuning**
225
- ```bash
226
- # For slow connections
227
- python main.py --timeout 60 --max-retries 5
228
-
229
- # For fast connections
230
- python main.py --timeout 15 --max-retries 2
231
-
232
- # For debug/troubleshooting
233
- python main.py --verbose --show-browser
234
- ```
235
-
236
- ### **Output Directory Options**
237
- - **Default**: `./downloads` (project folder)
238
- - **Custom**: Any valid path (e.g., `C:/Users/Student/Papers`)
239
- - **Interactive**: Choose during runtime
240
- - **Auto**: Use `--no-location-prompt` to skip selection
241
-
242
- ---
243
-
244
- ## 🧪 Testing & Validation
245
-
246
- ### **Built-in Test Scripts**
247
-
248
- **1. Complete System Test**
249
- ```bash
250
- python test_setup.py
251
- ```
252
- *Tests Python environment, dependencies, browser drivers, and network connectivity*
253
-
254
- **2. Authentication Test**
255
- ```bash
256
- python test_login.py
257
- ```
258
- *Tests only the UM login process (useful for credential verification)*
259
-
260
- **3. Search Functionality Test**
261
- ```bash
262
- python test_search_debug.py
263
- ```
264
- *Tests paper search without downloading*
265
-
266
- ### **Validation Features**
267
- - **PDF Integrity Check**: Verifies downloaded files are valid PDFs
268
- - **Size Validation**: Ensures files aren't empty or corrupted
269
- - **Download Verification**: Confirms all expected papers were downloaded
270
- - **ZIP Integrity**: Validates ZIP file creation and contents
271
-
272
- ---
273
-
274
- ## 🛠️ Technical Architecture
275
-
276
- ### **Modular Components**
277
-
278
- **1. Authentication (`auth/um_authenticator.py`)**
279
- - Handles complex UM OpenAthens SAML authentication
280
- - Manages session cookies and security tokens
281
- - Supports multiple browser backends
282
-
283
- **2. Paper Discovery (`scraper/paper_scraper.py`)**
284
- - Searches UM repository by subject code
285
- - Extracts paper metadata (year, semester, type)
286
- - Handles pagination and result filtering
287
-
288
- **3. Download Engine (`downloader/pdf_downloader.py`)**
289
- - Concurrent multi-threaded downloads
290
- - Progress tracking with visual indicators
291
- - Retry logic with exponential backoff
292
- - File integrity validation
293
-
294
- **4. Archive Creator (`utils/zip_creator.py`)**
295
- - Intelligent file organization by year/semester
296
- - Optimized compression algorithms
297
- - Auto-generated documentation
298
- - Metadata preservation
299
-
300
- **5. Logging System (`utils/logger.py`)**
301
- - Structured logging with multiple levels
302
- - Separate log files for debugging
303
- - Performance metrics and timing
304
-
305
- ### **Dependencies**
306
- - `selenium` - Web automation and browser control
307
- - `requests` - HTTP session management
308
- - `beautifulsoup4` - HTML parsing and data extraction
309
- - `tqdm` - Progress bars and status indicators
310
- - `webdriver-manager` - Automatic browser driver management
311
-
312
- ---
313
-
314
- ## 🚨 Troubleshooting
315
-
316
- ### **Common Issues & Solutions**
317
-
318
- **❌ Login Failed**
319
- - ✅ Verify username/password are correct
320
- - ✅ Check if your UM account is active
321
- - ✅ Try using Edge browser: `--browser edge`
322
- - ✅ Enable debug mode: `--verbose --show-browser`
323
-
324
- **❌ No Papers Found**
325
- - ✅ Verify subject code is correct (e.g., WIA1005, not wia1005)
326
- - ✅ Check if papers exist for that subject
327
- - ✅ Try different semester/year variations
328
-
329
- **❌ Download Errors**
330
- - ✅ Check internet connection stability
331
- - ✅ Increase timeout: `--timeout 60`
332
- - ✅ Increase retries: `--max-retries 5`
333
- - ✅ Check disk space in output directory
334
-
335
- **❌ Browser/WebDriver Issues**
336
- - ✅ **Windows users**: Use Edge first: `--browser edge`
337
- - ✅ Update browser to latest version
338
- - ✅ Try: `pip install --upgrade webdriver-manager`
339
- - ✅ See `TROUBLESHOOTING.md` for detailed solutions
340
-
341
- ### **Exit Codes**
342
- - `0` - Success
343
- - `1` - Authentication failure
344
- - `2` - Network connectivity issues
345
- - `3` - No papers found or download failed
346
- - `4` - File system permissions error
347
- - `130` - User cancelled (Ctrl+C)
348
-
349
- ---
350
-
351
- ## 📈 Performance Metrics
352
-
353
- ### **Typical Performance**
354
- - **Authentication**: 5-10 seconds
355
- - **Paper Search**: 2-5 seconds
356
- - **Download Speed**: 2-5 MB/s per file (concurrent)
357
- - **ZIP Creation**: 1-3 seconds
358
- - **Total Time**: 30 seconds - 2 minutes (depending on paper count)
359
-
360
- ### **Optimization Features**
361
- - **Concurrent Downloads**: Up to 4 simultaneous downloads
362
- - **Intelligent Caching**: Avoids re-downloading existing files
363
- - **Compressed Archives**: ZIP compression reduces file size by 10-30%
364
- - **Progress Tracking**: Real-time ETA and speed indicators
365
-
366
- ---
367
-
368
- ## ⚖️ Legal & Academic Use
369
-
370
- ### **Terms of Use**
371
- - ✅ **Educational Purpose Only**: For UM students' academic use
372
- - ✅ **Respect UM Policies**: Adheres to university terms of service
373
- - ✅ **No Circumvention**: Uses standard authentication methods
374
- - ✅ **Rate Limiting**: Respects server load limits
375
- - ✅ **Valid Credentials Required**: Must have active UM account
376
-
377
- ### **What This Tool Does NOT Do**
378
- - ❌ Bypass any security measures
379
- - ❌ Access restricted content
380
- - ❌ Store or share credentials
381
- - ❌ Violate copyright or academic policies
382
- - ❌ Access content you don't have permission for
383
-
384
- ---
385
-
386
- ## 💡 Tips for Best Experience
387
-
388
- ### **For Windows Users**
389
- ```bash
390
- # Recommended command for Windows
391
- python main.py --browser edge --subject-code WIA1005
392
- ```
393
-
394
- ### **For Mac/Linux Users**
395
- ```bash
396
- # Recommended command for Mac/Linux
397
- python main.py --browser chrome --subject-code WIA1005
398
- ```
399
-
400
- ### **For Slow Connections**
401
- ```bash
402
- python main.py --timeout 90 --max-retries 5 --subject-code WIA1005
403
- ```
404
-
405
- ### **For Batch Processing**
406
- ```bash
407
- # Create a batch script for multiple subjects
408
- python main.py -s WIA1005 --no-location-prompt -o "./Papers/WIA1005"
409
- python main.py -s WXES1116 --no-location-prompt -o "./Papers/WXES1116"
410
- ```
411
-
412
- ---
413
-
414
- ## 🤝 Support & Contributing
415
-
416
- ### **Getting Help**
417
- 1. **📖 Read `TROUBLESHOOTING.md`** - Comprehensive solution guide
418
- 2. **🔍 Check logs** - Review log files for detailed error information
419
- 3. **🧪 Run tests** - Use `python test_setup.py` to validate environment
420
- 4. **🔄 Try Edge browser** - Often resolves driver issues: `--browser edge`
421
-
422
- ### **Contributing**
423
- Contributions welcome! Please:
424
- 1. Fork the repository
425
- 2. Create a feature branch
426
- 3. Make your changes with tests
427
- 4. Submit a pull request
428
- 5. Follow existing code style and documentation standards
429
-
430
- ---
431
-
432
- ## 📄 Disclaimer
433
-
434
- **Disclaimer**: This tool is an unofficial utility created to help UM students access past year papers more efficiently. It is not affiliated with or endorsed by University Malaya. Users are responsible for complying with UM's terms of service and academic policies.
435
-
436
- ---
437
-
438
- ## 🎯 Quick Command Cheat Sheet
439
-
440
- ```bash
441
- # Basic usage
442
- python main.py
443
-
444
- # Fast automated mode
445
- python main.py -u username -s WIA1005 --no-location-prompt
446
-
447
- # Debug mode
448
- python main.py --verbose --show-browser -s WIA1005
449
-
450
- # High performance
451
- python main.py --max-retries 5 --timeout 60 -s WXES1116
452
-
453
- # Custom location
454
- python main.py -o "C:/Papers" -s CSC1025
455
-
456
- # Windows optimized
457
- python main.py --browser edge -s WIA1005
458
- ```
459
-
460
- ---
461
-
462
- *Time to lock in for your final*